/*****************************************************************************
 *                                McPAT
 *                      SOFTWARE LICENSE AGREEMENT
 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
 *                          All Rights Reserved
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met: redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer;
 * redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution;
 * neither the name of the copyright holders nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
 *
 ***************************************************************************/
/********************************************************************
*      Modified by:												   *
*      Jingwen Leng, Univeristy of Texas, Austin                   *
*      Syed Gilani, University of Wisconsin–Madison                *
*      Tayler Hetherington, University of British Columbia         *
*      Ahmed ElTantawy, University of British Columbia             *
********************************************************************/

#include "io.h"
#include "parameter.h"
#include "const.h"
#include "cacti/basic_circuit.h"
#include <iostream>
#include <algorithm>
#include "XML_Parse.h"
#include <string>
#include <cmath>
#include <assert.h>
#include "core.h"
//#include "globalvar.h"
//double exClockRate;
//*********************
//Operand collector (OC) modelling (Syed Gilani)
//*********************
//The OCs are modelled similar to the GPGPU-Sim v3.x documentation and
//nVIDIA patents.
//the OC need the following GPGPU-Sim config options:
//-gpgpu_num_reg_banks                    8 # Number of register banks (default = 8)
//-gpgpu_reg_bank_use_warp_id                    0 # Use warp ID in mapping registers to banks (default = off)
//-gpgpu_operand_collector_num_units_sp                    6 # number of collector units (default = 4)
//-gpgpu_operand_collector_num_units_sfu                    8 # number of collector units (default = 4)
//-gpgpu_operand_collector_num_units_mem                    2 # number of collector units (default = 2)
//-gpgpu_operand_collector_num_units_gen                    0 # number of collector units (default = 0)
//-gpgpu_operand_collector_num_in_ports_sp                    1 # number of collector unit in ports (default = 1)
//-gpgpu_operand_collector_num_in_ports_sfu                    1 # number of collector unit in ports (default = 1)
//-gpgpu_operand_collector_num_in_ports_mem                    1 # number of collector unit in ports (default = 1)
//-gpgpu_operand_collector_num_in_ports_gen                    0 # number of collector unit in ports (default = 0)
//-gpgpu_operand_collector_num_out_ports_sp                    1 # number of collector unit in ports (default = 1)
//-gpgpu_operand_collector_num_out_ports_sfu                    1 # number of collector unit in ports (default = 1)
//-gpgpu_operand_collector_num_out_ports_mem                    1 # number of collector unit in ports (default = 1)
//-gpgpu_operand_collector_num_out_ports_gen                    0 # number of collector unit in ports (default = 0)

//The total number of collector units and their input ports, and the number of register file banks
//determine the crossbar size. 

InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
:XML(XML_interface),
 ithCore(ithCore_),
 interface_ip(*interface_ip_),
 coredynp(dyn_p_),
 IB  (0),
 BTB (0),
 ID_inst  (0),
 ID_operand  (0),
 ID_misc  (0),
 exist(exist_)
{
	  if (!exist) return;
	  int  idx, tag, data, size, line, assoc, banks;
	  bool debug= false, is_default = true;

	  clockRate = coredynp.clockRate;
	  executionTime = coredynp.executionTime;

	  cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7];
	  //Assuming all L1 caches are virtually idxed physically tagged.
	  //cache

	  size                             = (int)XML->sys.core[ithCore].icache.icache_config[0];
	  line                             = (int)XML->sys.core[ithCore].icache.icache_config[1];
	  assoc                            = (int)XML->sys.core[ithCore].icache.icache_config[2];
	  banks                            = (int)XML->sys.core[ithCore].icache.icache_config[3];
	  idx    					 	   = debug?9:int(ceil(log2(size/line/assoc)));
	  tag							   = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.cache_sz            = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0];
	  interface_ip.line_sz             = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1];
	  interface_ip.assoc               = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2];
	  interface_ip.nbanks              = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3];
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5];
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
	  interface_ip.latency             = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
	  interface_ip.is_cache			 = true;
	  interface_ip.pure_cam			 = false;
	  interface_ip.pure_ram			 = false;
	//  interface_ip.obj_func_dyn_energy = 0;
	//  interface_ip.obj_func_dyn_power  = 0;
	//  interface_ip.obj_func_leak_power = 0;
	//  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty);
	  scktRatio = g_tp.sckt_co_eff;
	  chip_PR_overhead = g_tp.chip_layout_overhead;
	  macro_PR_overhead = g_tp.macro_layout_overhead;
	  icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area);
	  area.set_area(area.get_area()+ icache.caches->local_result.area);
	  //output_data_csv(icache.caches.local_result);


	  /*
	   *iCache controllers
	   *miss buffer Each MSHR contains enough state
	   *to handle one or more accesses of any type to a single memory line.
	   *Due to the generality of the MSHR mechanism,
	   *the amount of state involved is non-trivial:
	   *including the address, pointers to the cache entry and destination register,
	   *written data, and various other pieces of state.
	   */
	  interface_ip.num_search_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
	  icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area);
	  area.set_area(area.get_area()+ icache.missb->local_result.area);
	  //output_data_csv(icache.missb.local_result);

	  //fill buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = icache.caches->l_ip.line_sz;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = data*XML->sys.core[ithCore].icache.buffer_sizes[1];
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
	  icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area);
	  area.set_area(area.get_area()+ icache.ifb->local_result.area);
	  //output_data_csv(icache.ifb.local_result);

	  //prefetch buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
	  data							   = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
	  icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area);
	  area.set_area(area.get_area()+ icache.prefetchb->local_result.area);
	  //output_data_csv(icache.prefetchb.local_result);

	  //Instruction buffer
	  data							   = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer.
	  interface_ip.is_cache			   = false;
	  interface_ip.pure_ram            = true;
	  interface_ip.pure_cam            = false;
	  interface_ip.line_sz             = int(ceil(data/8.0));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64?
			                             XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64;
	  interface_ip.assoc               = 1;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;
	  interface_ip.throughput          = 1.0/clockRate;
	  interface_ip.latency             = 1.0/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions.
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  IB->area.set_area(IB->area.get_area()+ IB->local_result.area);
	  area.set_area(area.get_area()+ IB->local_result.area);
	  //output_data_csv(IB.IB.local_result);

	  //	  inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width;
	  //	  inst_decoder.init_decoder(is_default, &interface_ip);
	  //	  inst_decoder.full_decoder_power();

      if (coredynp.predictionW>0)
      {
    	  /*
    	   * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
    	   * It is only a cache without all the buffers in the cache controller since it is more like a
    	   * look up table than a cache with cache controller. When access miss, no load from other places
    	   * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
    	   * 1)  when BPT@ID stage finds out current is a taken branch while BTB missed
    	   * 2)  When BPT@ID stage predicts differently than BTB
    	   * 3)  When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
    	   * 4)  when EXEU find out wrong target has been provided from BTB.
    	   *
    	   */
    	  size                             = XML->sys.core[ithCore].BTB.BTB_config[0];
    	  line                             = XML->sys.core[ithCore].BTB.BTB_config[1];
    	  assoc                            = XML->sys.core[ithCore].BTB.BTB_config[2];
    	  banks                            = XML->sys.core[ithCore].BTB.BTB_config[3];
    	  idx    					 	   = debug?9:int(ceil(log2(size/line/assoc)));
//    	  tag							   = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
    	  tag							   = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
    	  interface_ip.is_cache			   = true;
    	  interface_ip.pure_ram            = false;
    	  interface_ip.pure_cam            = false;
    	  interface_ip.specific_tag        = 1;
    	  interface_ip.tag_w               = tag;
    	  interface_ip.cache_sz            = debug?32768:size;
    	  interface_ip.line_sz             = debug?64:line;
    	  interface_ip.assoc               = debug?8:assoc;
    	  interface_ip.nbanks              = debug?1:banks;
    	  interface_ip.out_w               = interface_ip.line_sz*8;
    	  interface_ip.access_mode         = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
    	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate;
    	  interface_ip.latency             = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate;
    	  interface_ip.obj_func_dyn_energy = 0;
    	  interface_ip.obj_func_dyn_power  = 0;
    	  interface_ip.obj_func_leak_power = 0;
    	  interface_ip.obj_func_cycle_t    = 1;
    	  interface_ip.num_rw_ports    = 1;
    	  interface_ip.num_rd_ports    = coredynp.predictionW;
    	  interface_ip.num_wr_ports    = coredynp.predictionW;
    	  interface_ip.num_se_rd_ports = 0;
    	  BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty);
    	  BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area);
    	  area.set_area(area.get_area()+ BTB->local_result.area);
    	  ///cout<<"area="<<area<<endl;

    	  BPT = new BranchPredictor(XML, ithCore, &interface_ip,coredynp);
    	  area.set_area(area.get_area()+ BPT->area.get_area());
      }

      ID_inst = new inst_decoder(is_default, &interface_ip,
    		  coredynp.opcode_length, 1/*Decoder should not know how many by itself*/,
    		  coredynp.x86,
    		  Core_device, coredynp.core_ty);

      ID_operand = new inst_decoder(is_default, &interface_ip,
    		  coredynp.arch_ireg_width, 1,
    		  coredynp.x86,
    		  Core_device, coredynp.core_ty);

      ID_misc = new inst_decoder(is_default, &interface_ip,
    		  8/* Prefix field etc upto 14B*/, 1,
    		  coredynp.x86,
    		  Core_device, coredynp.core_ty);
      //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer.
      //So the dynamic power should be multiplied by a few times.
      area.set_area(area.get_area()+ (ID_inst->area.get_area()
    		  +ID_operand->area.get_area()
    		  +ID_misc->area.get_area())*coredynp.decodeW);

}


BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
:XML(XML_interface),
 ithCore(ithCore_),
 interface_ip(*interface_ip_),
 coredynp(dyn_p_),
 globalBPT(0),
 localBPT(0),
 L1_localBPT(0),
 L2_localBPT(0),
 chooser(0),
 RAS(0),
 exist(exist_)
{
	/*
	 * Branch Predictor, accessed during ID stage.
	 * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264,
	 * including global predictor, local two level predictor, and Chooser.
	 * The Branch predictor also includes a RAS (return address stack) for function calls
	 * Branch predictors are tagged by thread ID and modeled as 1-way associative $
	 * However RAS return address stacks are duplicated for each thread.
	 * TODO:Data Width need to be computed more precisely	 *
	 */
	if (!exist) return;
	int  tag, data;

	clockRate = coredynp.clockRate;
	executionTime = coredynp.executionTime;
	interface_ip.assoc               = 1;
	interface_ip.pure_cam            = false;
	if (coredynp.multithreaded)
	{

		tag							     = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS);
		interface_ip.specific_tag        = 1;
		interface_ip.tag_w               = tag;

		interface_ip.is_cache			 = true;
		interface_ip.pure_ram            = false;
		}
	else
	{
		interface_ip.is_cache			 = false;
		interface_ip.pure_ram            = true;

	}
	//Global predictor
	data							 = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0));
	interface_ip.line_sz             = data;
	interface_ip.cache_sz            = data*XML->sys.core[ithCore].predictor.global_predictor_entries;
	interface_ip.nbanks              = 1;
	interface_ip.out_w               = interface_ip.line_sz*8;
	interface_ip.access_mode         = 2;
	interface_ip.throughput          = 1.0/clockRate;
	interface_ip.latency             = 1.0/clockRate;
	interface_ip.obj_func_dyn_energy = 0;
	interface_ip.obj_func_dyn_power  = 0;
	interface_ip.obj_func_leak_power = 0;
	interface_ip.obj_func_cycle_t    = 1;
	interface_ip.num_rw_ports    = 0;
	interface_ip.num_rd_ports    = coredynp.predictionW;
	interface_ip.num_wr_ports    = coredynp.predictionW;
	interface_ip.num_se_rd_ports = 0;
	globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
	globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area);
	area.set_area(area.get_area()+ globalBPT->local_result.area);

	//Local BPT (Level 1)
	data							 = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0));
	interface_ip.line_sz             = data;
	interface_ip.cache_sz            = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
	interface_ip.nbanks              = 1;
	interface_ip.out_w               = interface_ip.line_sz*8;
	interface_ip.access_mode         = 2;
	interface_ip.throughput          = 1.0/clockRate;
	interface_ip.latency             = 1.0/clockRate;
	interface_ip.obj_func_dyn_energy = 0;
	interface_ip.obj_func_dyn_power  = 0;
	interface_ip.obj_func_leak_power = 0;
	interface_ip.obj_func_cycle_t    = 1;
	interface_ip.num_rw_ports    = 0;
	interface_ip.num_rd_ports    = coredynp.predictionW;
	interface_ip.num_wr_ports    = coredynp.predictionW;
	interface_ip.num_se_rd_ports = 0;
	L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
	L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area);
	area.set_area(area.get_area()+ L1_localBPT->local_result.area);

	//Local BPT (Level 2)
	data							 = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0));
	interface_ip.line_sz             = data;
	interface_ip.cache_sz            = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
	interface_ip.nbanks              = 1;
	interface_ip.out_w               = interface_ip.line_sz*8;
	interface_ip.access_mode         = 2;
	interface_ip.throughput          = 1.0/clockRate;
	interface_ip.latency             = 1.0/clockRate;
	interface_ip.obj_func_dyn_energy = 0;
	interface_ip.obj_func_dyn_power  = 0;
	interface_ip.obj_func_leak_power = 0;
	interface_ip.obj_func_cycle_t    = 1;
	interface_ip.num_rw_ports    = 0;
	interface_ip.num_rd_ports    = coredynp.predictionW;
	interface_ip.num_wr_ports    = coredynp.predictionW;
	interface_ip.num_se_rd_ports = 0;
	L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
	L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area);
	area.set_area(area.get_area()+ L2_localBPT->local_result.area);

	//Chooser
	data							 = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0));
	interface_ip.line_sz             = data;
	interface_ip.cache_sz            = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries;
	interface_ip.nbanks              = 1;
	interface_ip.out_w               = interface_ip.line_sz*8;
	interface_ip.access_mode         = 2;
	interface_ip.throughput          = 1.0/clockRate;
	interface_ip.latency             = 1.0/clockRate;
	interface_ip.obj_func_dyn_energy = 0;
	interface_ip.obj_func_dyn_power  = 0;
	interface_ip.obj_func_leak_power = 0;
	interface_ip.obj_func_cycle_t    = 1;
	interface_ip.num_rw_ports    = 0;
	interface_ip.num_rd_ports    = coredynp.predictionW;
	interface_ip.num_wr_ports    = coredynp.predictionW;
	interface_ip.num_se_rd_ports = 0;
	chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty);
	chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area);
	area.set_area(area.get_area()+ chooser->local_result.area);

	//RAS return address stacks are Duplicated for each thread.
	interface_ip.is_cache			 = false;
	interface_ip.pure_ram            = true;
	data							 = int(ceil(coredynp.pc_width/8.0));
	interface_ip.line_sz             = data;
	interface_ip.cache_sz            = data*XML->sys.core[ithCore].RAS_size;
	interface_ip.assoc               = 1;
	interface_ip.nbanks              = 1;
	interface_ip.out_w               = interface_ip.line_sz*8;
	interface_ip.access_mode         = 2;
	interface_ip.throughput          = 1.0/clockRate;
	interface_ip.latency             = 1.0/clockRate;
	interface_ip.obj_func_dyn_energy = 0;
	interface_ip.obj_func_dyn_power  = 0;
	interface_ip.obj_func_leak_power = 0;
	interface_ip.obj_func_cycle_t    = 1;
	interface_ip.num_rw_ports    = 0;
	interface_ip.num_rd_ports    = coredynp.predictionW;
	interface_ip.num_wr_ports    = coredynp.predictionW;
	interface_ip.num_se_rd_ports = 0;
	RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty);
	RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
	area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);

}

SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
:XML(XML_interface),
 ithCore(ithCore_),
 interface_ip(*interface_ip_),
 coredynp(dyn_p_),
 int_inst_window(0),
 fp_inst_window(0),
 ROB(0),
 instruction_selection(0),
 exist(exist_)
 {
	if (!exist) return;
	int   tag, data;
	bool  is_default=true;
	string tmp_name;

	clockRate = coredynp.clockRate;
	executionTime = coredynp.executionTime;
	if ((coredynp.core_ty==Inorder && coredynp.multithreaded))
	{
		//Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors
		tag							     = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design
		data							 = XML->sys.core[ithCore].instruction_length;
		//NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures
		//Software Developer’s Manual
		interface_ip.is_cache			 = true;
		interface_ip.pure_cam            = false;
		interface_ip.pure_ram            = false;
		interface_ip.line_sz             = int(ceil(data/8.0));
		interface_ip.specific_tag        = 1;
		interface_ip.tag_w               = tag;
		interface_ip.cache_sz            = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64;
		interface_ip.assoc               = 0;
		interface_ip.nbanks              = 1;
		interface_ip.out_w               = interface_ip.line_sz*8;
		interface_ip.access_mode         = 1;
		interface_ip.throughput          = 1.0/clockRate;
		interface_ip.latency             = 1.0/clockRate;
		interface_ip.obj_func_dyn_energy = 0;
		interface_ip.obj_func_dyn_power  = 0;
		interface_ip.obj_func_leak_power = 0;
		interface_ip.obj_func_cycle_t    = 1;
		interface_ip.num_rw_ports        = 0;
		interface_ip.num_rd_ports        = coredynp.peak_issueW;
		interface_ip.num_wr_ports        = coredynp.peak_issueW;
		interface_ip.num_se_rd_ports     = 0;
		interface_ip.num_search_ports    = coredynp.peak_issueW;
		int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
		int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
		area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
		//output_data_csv(iRS.RS.local_result);
		Iw_height      =int_inst_window->local_result.cache_ht;

		/*
		 * selection logic
		 * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
		 * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
		 * at the issue stage.
		 */

		instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
				coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads,
				&interface_ip, Core_device, coredynp.core_ty);
	}

    if (coredynp.core_ty==OOO)
    {
    	/*
    	 * CAM based instruction window
    	 * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored
    	 * For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored
    	 * It is written once and read twice(two operands) before an instruction can be issued.
    	 * X86 instruction can be very long up to 15B. add instruction length in XML
    	 */
    	if(coredynp.scheu_ty==PhysicalRegFile)
    	{
    		tag	 = coredynp.phy_ireg_width;
    		// Each time only half of the tag is compared, but two tag should be stored.
    		// This underestimate the search power
    		data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0));
	        //Data width being divided by 2 means only after both operands available the whole data will be read out.
	        //This is modeled using two equivalent readouts with half of the data width
    		tmp_name = "InstIssueQueue";
    	}
    	else
    	{
	        tag	  = coredynp.phy_ireg_width;
    		// Each time only half of the tag is compared, but two tag should be stored.
    		// This underestimate the search power
	        data  = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+
	        		2*coredynp.int_data_width)/2.0)/8.0));
	        //Data width being divided by 2 means only after both operands available the whole data will be read out.
	        //This is modeled using two equivalent readouts with half of the data width

	        tmp_name = "IntReservationStation";
    	}
    	interface_ip.is_cache			 = true;
    	interface_ip.pure_cam            = false;
    	interface_ip.pure_ram            = false;
    	interface_ip.line_sz             = data;
    	interface_ip.cache_sz            = data*XML->sys.core[ithCore].instruction_window_size;
    	interface_ip.assoc               = 0;
    	interface_ip.nbanks              = 1;
    	interface_ip.out_w               = interface_ip.line_sz*8;
    	interface_ip.specific_tag        = 1;
    	interface_ip.tag_w               = tag;
    	interface_ip.access_mode         = 0;
    	interface_ip.throughput          = 2*1.0/clockRate;
    	interface_ip.latency             = 2*1.0/clockRate;
    	interface_ip.obj_func_dyn_energy = 0;
    	interface_ip.obj_func_dyn_power  = 0;
    	interface_ip.obj_func_leak_power = 0;
    	interface_ip.obj_func_cycle_t    = 1;
    	interface_ip.num_rw_ports       = 0;
    	interface_ip.num_rd_ports       = coredynp.peak_issueW;
    	interface_ip.num_wr_ports       = coredynp.peak_issueW;
    	interface_ip.num_se_rd_ports    = 0;
		interface_ip.num_search_ports   = coredynp.peak_issueW;
		int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
		int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
		area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
		Iw_height      =int_inst_window->local_result.cache_ht;
		//FU inst window
    	if(coredynp.scheu_ty==PhysicalRegFile)
    	{
    		tag	 = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared
    		data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0));
    		tmp_name = "FPIssueQueue";
    	}
    	else
    	{
	        tag	  = 2*coredynp.phy_ireg_width;
	        data  = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+
	        		2*coredynp.fp_data_width)/8.0));
	        tmp_name = "FPReservationStation";
    	}
    	interface_ip.is_cache			 = true;
    	interface_ip.pure_cam            = false;
    	interface_ip.pure_ram            = false;
    	interface_ip.line_sz             = data;
    	interface_ip.cache_sz            = data*XML->sys.core[ithCore].fp_instruction_window_size;
    	interface_ip.assoc               = 0;
    	interface_ip.nbanks              = 1;
    	interface_ip.out_w               = interface_ip.line_sz*8;
    	interface_ip.specific_tag        = 1;
    	interface_ip.tag_w               = tag;
    	interface_ip.access_mode         = 0;
    	interface_ip.throughput          = 1.0/clockRate;
    	interface_ip.latency             = 1.0/clockRate;
    	interface_ip.obj_func_dyn_energy = 0;
    	interface_ip.obj_func_dyn_power  = 0;
    	interface_ip.obj_func_leak_power = 0;
    	interface_ip.obj_func_cycle_t    = 1;
    	interface_ip.num_rw_ports       = 0;
    	interface_ip.num_rd_ports       = coredynp.fp_issueW;
    	interface_ip.num_wr_ports       = coredynp.fp_issueW;
    	interface_ip.num_se_rd_ports    = 0;
		interface_ip.num_search_ports   = coredynp.fp_issueW;
		fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
		fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
		area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
		fp_Iw_height      =fp_inst_window->local_result.cache_ht;

		if (XML->sys.core[ithCore].ROB_size >0)
		{
			/*
			 *  if ROB_size = 0, then the target processor does not support hardware-based
			 *  speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
			 *  means branch must be resolved before instruction issued into instruction window, since
			 *  there is no change to flush miss-predict branch path after instructions are issued in this situation.
			 *
			 *  ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
			 *  One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
			 *  However, this approach is abandoned due to its high power and poor scalablility.
			 *	McPAT uses current implementation of ROB as circular buffer.
			 *	ROB is written once when instruction is issued and read once when the instruction is committed.         *
			 */
			int robExtra = int(ceil(5 + log2(coredynp.num_hthreads)));
			//5 bits are: busy, Issued, Finished, speculative, valid
			if(coredynp.scheu_ty==PhysicalRegFile)
			{
				//PC is to id the instruction for recover exception.
				//inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update
//				data = int(ceil((robExtra+coredynp.pc_width +
//						coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0));
				data = int(ceil((robExtra+coredynp.pc_width +
							coredynp.phy_ireg_width)/8.0));
			}
			else
			{
				//in RS based OOO, ROB also contains value of destination reg
//				data  = int(ceil((robExtra+coredynp.pc_width +
//						coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
				data  = int(ceil((robExtra + coredynp.pc_width +
						coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
			}
			interface_ip.is_cache			 = false;
			interface_ip.pure_cam            = false;
			interface_ip.pure_ram            = true;
			interface_ip.line_sz             = data;
			interface_ip.cache_sz            = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads
			interface_ip.assoc               = 1;
			interface_ip.nbanks              = 1;
			interface_ip.out_w               = interface_ip.line_sz*8;
			interface_ip.access_mode         = 1;
			interface_ip.throughput          = 1.0/clockRate;
			interface_ip.latency             = 1.0/clockRate;
			interface_ip.obj_func_dyn_energy = 0;
			interface_ip.obj_func_dyn_power  = 0;
			interface_ip.obj_func_leak_power = 0;
			interface_ip.obj_func_cycle_t    = 1;
			interface_ip.num_rw_ports       = 0;
			interface_ip.num_rd_ports       = coredynp.peak_commitW;
			interface_ip.num_wr_ports       = coredynp.peak_issueW;
			interface_ip.num_se_rd_ports    = 0;
			interface_ip.num_search_ports   = 0;
			ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
			ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
			area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
			ROB_height      =ROB->local_result.cache_ht;
		}

		instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
				coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty);
    }
}

LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
:XML(XML_interface),
 ithCore(ithCore_),
 interface_ip(*interface_ip_),
 coredynp(dyn_p_),
 LSQ(0),
 exist(exist_)
{
	  if (!exist) return;
	  int  idx, tag, data, size, line, assoc;
	  bool debug= false;
	  int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16;

	  clockRate = coredynp.clockRate;
	  executionTime = coredynp.executionTime;
	  cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7];

	  interface_ip.num_search_ports    = XML->sys.core[ithCore].memory_ports;
	  interface_ip.is_cache			   = true;
	  interface_ip.pure_cam            = false;
	  interface_ip.pure_ram            = false;
	 

	  //Crossbar based interconnect for shared memory accesses, added by Syed
	  //Crossbar

	  if(XML->sys.architecture==1){
    xbar_shared     = new Crossbar(coredynp.num_fpus,coredynp.num_fpus,32,&(g_tp.peri_global));//Syed: coredynp.num_fpus is used as simd_width  
	  }
	  else{
	  xbar_shared     = new Crossbar(coredynp.num_fpus,coredynp.num_fpus,32,&(g_tp.peri_global));//Syed: coredynp.num_fpus is used as simd_width
	  }


     //TODO: Check if this line should be changed to
     //new Crossbar(simd_width,shared_memory_banks,word_length*simd_width,&(g_tp.peri_global));

    //shared memory added by Jingwen
	  size                             = (int)XML->sys.core[ithCore].sharedmemory.dcache_config[0];
	  line                             = (int)XML->sys.core[ithCore].sharedmemory.dcache_config[1];
	  assoc                            = (int)XML->sys.core[ithCore].sharedmemory.dcache_config[2];
	  idx    					 	   = debug?9:int(ceil(log2(size/line/assoc)));
	  tag							   = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = 1;
	  interface_ip.cache_sz            = debug?32768:(int)XML->sys.core[ithCore].sharedmemory.dcache_config[0];
	  interface_ip.line_sz             = debug?64:(int)XML->sys.core[ithCore].sharedmemory.dcache_config[1];
	  interface_ip.assoc               = debug?8:(int)XML->sys.core[ithCore].sharedmemory.dcache_config[2];
	  interface_ip.nbanks              = debug?1:(int)XML->sys.core[ithCore].sharedmemory.dcache_config[3];
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;//debug?0:XML->sys.core[ithCore].sharedmemory.dcache_config[5];
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?3.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[5]/clockRate;
	  interface_ip.is_cache			 = true;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  sharedmemory.caches = new ArrayST(&interface_ip, "sharedmemory", Core_device, coredynp.opt_local, coredynp.core_ty);
	  sharedmemory.area.set_area(sharedmemory.area.get_area()+ sharedmemory.caches->local_result.area);
	  area.set_area(area.get_area()+ sharedmemory.caches->local_result.area + xbar_shared->area.get_area());


    //shared memory buffer
	  //miss buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + sharedmemory.caches->l_ip.line_sz*8;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = 1;
	  interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].sharedmemory.buffer_sizes[0]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  sharedmemory.missb = new ArrayST(&interface_ip, "SharedmemoryMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  sharedmemory.area.set_area(sharedmemory.area.get_area()+ sharedmemory.missb->local_result.area);
	  area.set_area(area.get_area()+ sharedmemory.missb->local_result.area);


    //sharedmemory fill buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = sharedmemory.caches->l_ip.line_sz;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = 1;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = data*XML->sys.core[ithCore].sharedmemory.buffer_sizes[1];
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  sharedmemory.ifb = new ArrayST(&interface_ip, "SharedMemoryFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  sharedmemory.area.set_area(sharedmemory.area.get_area()+ sharedmemory.ifb->local_result.area);
	  area.set_area(area.get_area()+ sharedmemory.ifb->local_result.area);

    //sharedmemory prefetch buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
	  data							   = sharedmemory.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = 1;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].sharedmemory.buffer_sizes[2]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  sharedmemory.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  sharedmemory.area.set_area(sharedmemory.area.get_area()+ sharedmemory.prefetchb->local_result.area);
	  area.set_area(area.get_area()+ sharedmemory.prefetchb->local_result.area);

    //shared memory WBB
	  if (cache_p==Write_back)
	  {
		  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
		  data							   = sharedmemory.caches->l_ip.line_sz;
		  interface_ip.specific_tag        = 1;
		  interface_ip.tag_w               = 1;
		  interface_ip.line_sz             = data;
		  interface_ip.cache_sz            = XML->sys.core[ithCore].sharedmemory.buffer_sizes[3]*interface_ip.line_sz;
		  interface_ip.assoc               = 0;
		  interface_ip.nbanks              = 1;
		  interface_ip.out_w               = interface_ip.line_sz*8;
		  interface_ip.access_mode         = 2;
		  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[4]/clockRate;
		  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].sharedmemory.dcache_config[5]/clockRate;
		  interface_ip.obj_func_dyn_energy = 0;
		  interface_ip.obj_func_dyn_power  = 0;
		  interface_ip.obj_func_leak_power = 0;
		  interface_ip.obj_func_cycle_t    = 1;
		  interface_ip.num_rw_ports    = XML->sys.core[ithCore].memory_ports;
		  interface_ip.num_rd_ports    = 0;
		  interface_ip.num_wr_ports    = 0;
		  interface_ip.num_se_rd_ports = 0;
		  sharedmemory.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
		  sharedmemory.area.set_area(sharedmemory.area.get_area()+ sharedmemory.wbb->local_result.area);
		  area.set_area(area.get_area()+ sharedmemory.wbb->local_result.area);
		  //output_data_csv(sharedmemory.wbb.local_result);
	  }


   /*
    * ccache starts here
    */   
    //Constant cache
	  size                             = (int)XML->sys.core[ithCore].ccache.dcache_config[0];
	  line                             = (int)XML->sys.core[ithCore].ccache.dcache_config[1];
	  assoc                            = (int)XML->sys.core[ithCore].ccache.dcache_config[2];
	  idx    					 	   = debug?9:int(ceil(log2(size/line/assoc)));
	  tag							   = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.cache_sz            = debug?32768:(int)XML->sys.core[ithCore].ccache.dcache_config[0];
	  interface_ip.line_sz             = debug?64:(int)XML->sys.core[ithCore].ccache.dcache_config[1];
	  interface_ip.assoc               = debug?8:(int)XML->sys.core[ithCore].ccache.dcache_config[2];
	  interface_ip.nbanks              = debug?1:(int)XML->sys.core[ithCore].ccache.dcache_config[3];
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;//debug?0:XML->sys.core[ithCore].ccache.dcache_config[5];
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?3.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[5]/clockRate;
	  interface_ip.is_cache			 = true;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  ccache.caches = new ArrayST(&interface_ip, "ccache", Core_device, coredynp.opt_local, coredynp.core_ty);
	  ccache.area.set_area(ccache.area.get_area()+ ccache.caches->local_result.area);
	  area.set_area(area.get_area()+ ccache.caches->local_result.area);
	  //output_data_csv(ccache.caches.local_result);



      //cCache controllers
	  //miss buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + ccache.caches->l_ip.line_sz*8;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].ccache.buffer_sizes[0]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  ccache.missb = new ArrayST(&interface_ip, "ccacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  ccache.area.set_area(ccache.area.get_area()+ ccache.missb->local_result.area);
	  area.set_area(area.get_area()+ ccache.missb->local_result.area);
	  //output_data_csv(ccache.missb.local_result);

	  //fill buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = ccache.caches->l_ip.line_sz;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = data*XML->sys.core[ithCore].ccache.buffer_sizes[1];
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  ccache.ifb = new ArrayST(&interface_ip, "ccacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  ccache.area.set_area(ccache.area.get_area()+ ccache.ifb->local_result.area);
	  area.set_area(area.get_area()+ ccache.ifb->local_result.area);
	  //output_data_csv(ccache.ifb.local_result);

	  //prefetch buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
	  data							   = ccache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].ccache.buffer_sizes[2]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  ccache.prefetchb = new ArrayST(&interface_ip, "ccacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  ccache.area.set_area(ccache.area.get_area()+ ccache.prefetchb->local_result.area);
	  area.set_area(area.get_area()+ ccache.prefetchb->local_result.area);
	  //output_data_csv(ccache.prefetchb.local_result);

	  //WBB
	  if (cache_p==Write_back)
	  {
		  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
		  data							   = ccache.caches->l_ip.line_sz;
		  interface_ip.specific_tag        = 1;
		  interface_ip.tag_w               = tag;
		  interface_ip.line_sz             = data;
		  interface_ip.cache_sz            = XML->sys.core[ithCore].ccache.buffer_sizes[3]*interface_ip.line_sz;
		  interface_ip.assoc               = 0;
		  interface_ip.nbanks              = 1;
		  interface_ip.out_w               = interface_ip.line_sz*8;
		  interface_ip.access_mode         = 2;
		  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[4]/clockRate;
		  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].ccache.dcache_config[5]/clockRate;
		  interface_ip.obj_func_dyn_energy = 0;
		  interface_ip.obj_func_dyn_power  = 0;
		  interface_ip.obj_func_leak_power = 0;
		  interface_ip.obj_func_cycle_t    = 1;
		  interface_ip.num_rw_ports    = XML->sys.core[ithCore].memory_ports;
		  interface_ip.num_rd_ports    = 0;
		  interface_ip.num_wr_ports    = 0;
		  interface_ip.num_se_rd_ports = 0;
		  ccache.wbb = new ArrayST(&interface_ip, "ccacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
		  ccache.area.set_area(ccache.area.get_area()+ ccache.wbb->local_result.area);
		  area.set_area(area.get_area()+ ccache.wbb->local_result.area);
		  //output_data_csv(ccache.wbb.local_result);
	  }

   /*
    * tcache starts here
    */   
    //Texture cache
	  size                             = (int)XML->sys.core[ithCore].tcache.dcache_config[0];
	  line                             = (int)XML->sys.core[ithCore].tcache.dcache_config[1];
	  assoc                            = (int)XML->sys.core[ithCore].tcache.dcache_config[2];
	  idx    					 	   = debug?9:int(ceil(log2(size/line/assoc)));
	  tag							   = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.cache_sz            = debug?32768:(int)XML->sys.core[ithCore].tcache.dcache_config[0];
	  interface_ip.line_sz             = debug?64:(int)XML->sys.core[ithCore].tcache.dcache_config[1];
	  interface_ip.assoc               = debug?8:(int)XML->sys.core[ithCore].tcache.dcache_config[2];
	  interface_ip.nbanks              = debug?1:(int)XML->sys.core[ithCore].tcache.dcache_config[3];
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;//debug?0:XML->sys.core[ithCore].tcache.dcache_config[5];
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?3.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[5]/clockRate;
	  interface_ip.is_cache			 = true;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  tcache.caches = new ArrayST(&interface_ip, "tcache", Core_device, coredynp.opt_local, coredynp.core_ty);
	  tcache.area.set_area(tcache.area.get_area()+ tcache.caches->local_result.area);
	  area.set_area(area.get_area()+ tcache.caches->local_result.area);
	  //output_data_csv(tcache.caches.local_result);


	  //tCache controllers
	  //miss buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + tcache.caches->l_ip.line_sz*8;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].tcache.buffer_sizes[0]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  tcache.missb = new ArrayST(&interface_ip, "tcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  tcache.area.set_area(tcache.area.get_area()+ tcache.missb->local_result.area);
	  area.set_area(area.get_area()+ tcache.missb->local_result.area);
	  //output_data_csv(tcache.missb.local_result);

	  //fill buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = tcache.caches->l_ip.line_sz;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = data*XML->sys.core[ithCore].tcache.buffer_sizes[1];
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  tcache.ifb = new ArrayST(&interface_ip, "tcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  tcache.area.set_area(tcache.area.get_area()+ tcache.ifb->local_result.area);
	  area.set_area(area.get_area()+ tcache.ifb->local_result.area);
	  //output_data_csv(tcache.ifb.local_result);

	  //prefetch buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
	  data							   = tcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].tcache.buffer_sizes[2]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  tcache.prefetchb = new ArrayST(&interface_ip, "tcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  tcache.area.set_area(tcache.area.get_area()+ tcache.prefetchb->local_result.area);
	  area.set_area(area.get_area()+ tcache.prefetchb->local_result.area);
	  //output_data_csv(tcache.prefetchb.local_result);

	  //WBB
	  if (cache_p==Write_back)
	  {
		  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
		  data							   = tcache.caches->l_ip.line_sz;
		  interface_ip.specific_tag        = 1;
		  interface_ip.tag_w               = tag;
		  interface_ip.line_sz             = data;
		  interface_ip.cache_sz            = XML->sys.core[ithCore].tcache.buffer_sizes[3]*interface_ip.line_sz;
		  interface_ip.assoc               = 0;
		  interface_ip.nbanks              = 1;
		  interface_ip.out_w               = interface_ip.line_sz*8;
		  interface_ip.access_mode         = 2;
		  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[4]/clockRate;
		  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].tcache.dcache_config[5]/clockRate;
		  interface_ip.obj_func_dyn_energy = 0;
		  interface_ip.obj_func_dyn_power  = 0;
		  interface_ip.obj_func_leak_power = 0;
		  interface_ip.obj_func_cycle_t    = 1;
		  interface_ip.num_rw_ports    = XML->sys.core[ithCore].memory_ports;
		  interface_ip.num_rd_ports    = 0;
		  interface_ip.num_wr_ports    = 0;
		  interface_ip.num_se_rd_ports = 0;
		  tcache.wbb = new ArrayST(&interface_ip, "tcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
		  tcache.area.set_area(tcache.area.get_area()+ tcache.wbb->local_result.area);
		  area.set_area(area.get_area()+ tcache.wbb->local_result.area);
		  //output_data_csv(tcache.wbb.local_result);
	  }




   /*
    * dcache starts here
    */   
    //Dcache
	  size                             = (int)XML->sys.core[ithCore].dcache.dcache_config[0];
	  line                             = (int)XML->sys.core[ithCore].dcache.dcache_config[1];
	  assoc                            = (int)XML->sys.core[ithCore].dcache.dcache_config[2];
	  idx    					 	   = debug?9:int(ceil(log2(size/line/assoc)));
	  tag							   = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.cache_sz            = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0];
	  interface_ip.line_sz             = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1];
	  interface_ip.assoc               = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2];
	  interface_ip.nbanks              = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3];
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
	  interface_ip.is_cache			 = true;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty);
	  dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area);
	  area.set_area(area.get_area()+ dcache.caches->local_result.area);
	  //output_data_csv(dcache.caches.local_result);


	  //dCache controllers
	  //miss buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area);
	  area.set_area(area.get_area()+ dcache.missb->local_result.area);
	  //output_data_csv(dcache.missb.local_result);

	  //fill buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
	  data							   = dcache.caches->l_ip.line_sz;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = data*XML->sys.core[ithCore].dcache.buffer_sizes[1];
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area);
	  area.set_area(area.get_area()+ dcache.ifb->local_result.area);
	  //output_data_csv(dcache.ifb.local_result);

	  //prefetch buffer
	  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
	  data							   = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 2;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = 0;
	  interface_ip.num_se_rd_ports = 0;
	  dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
	  dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area);
	  area.set_area(area.get_area()+ dcache.prefetchb->local_result.area);
	  //output_data_csv(dcache.prefetchb.local_result);

	  //WBB
	  if (cache_p==Write_back)
	  {
		  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
		  data							   = dcache.caches->l_ip.line_sz;
		  interface_ip.specific_tag        = 1;
		  interface_ip.tag_w               = tag;
		  interface_ip.line_sz             = data;
		  interface_ip.cache_sz            = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz;
		  interface_ip.assoc               = 0;
		  interface_ip.nbanks              = 1;
		  interface_ip.out_w               = interface_ip.line_sz*8;
		  interface_ip.access_mode         = 2;
		  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
		  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
		  interface_ip.obj_func_dyn_energy = 0;
		  interface_ip.obj_func_dyn_power  = 0;
		  interface_ip.obj_func_leak_power = 0;
		  interface_ip.obj_func_cycle_t    = 1;
		  interface_ip.num_rw_ports    = XML->sys.core[ithCore].memory_ports;
		  interface_ip.num_rd_ports    = 0;
		  interface_ip.num_wr_ports    = 0;
		  interface_ip.num_se_rd_ports = 0;
		  dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
		  dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area);
		  area.set_area(area.get_area()+ dcache.wbb->local_result.area);
		  //output_data_csv(dcache.wbb.local_result);
	  }

	  /*
	   * LSU--in-order processors do not have separate load queue: unified lsq
	   * partitioned among threads
	   * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
	   */
	  tag							   = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS;
	  data							   = XML->sys.machine_bits;
	  interface_ip.is_cache			   = true;
	  interface_ip.line_sz             = int(ceil(data/32.0))*4;
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.cache_sz            = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 1;
	  interface_ip.throughput          = 1.0/clockRate;
	  interface_ip.latency             = 1.0/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports        = 0;
	  interface_ip.num_rd_ports        = XML->sys.core[ithCore].memory_ports;
	  interface_ip.num_wr_ports        = XML->sys.core[ithCore].memory_ports;
	  interface_ip.num_se_rd_ports     = 0;
	  interface_ip.num_search_ports    =XML->sys.core[ithCore].memory_ports;
	  LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty);
	  LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area);
	  area.set_area(area.get_area()+ LSQ->local_result.area);
	  area.set_area(area.get_area()*cdb_overhead);
	  //output_data_csv(LSQ.LSQ.local_result);
	  lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/

	  if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
	  {
		  interface_ip.line_sz             = int(ceil(data/32.0))*4;
		  interface_ip.specific_tag        = 1;
		  interface_ip.tag_w               = tag;
		  interface_ip.cache_sz            = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
		  interface_ip.assoc               = 0;
		  interface_ip.nbanks              = 1;
		  interface_ip.out_w               = interface_ip.line_sz*8;
		  interface_ip.access_mode         = 1;
		  interface_ip.throughput          = 1.0/clockRate;
		  interface_ip.latency             = 1.0/clockRate;
		  interface_ip.obj_func_dyn_energy = 0;
		  interface_ip.obj_func_dyn_power  = 0;
		  interface_ip.obj_func_leak_power = 0;
		  interface_ip.obj_func_cycle_t    = 1;
		  interface_ip.num_rw_ports        = 0;
		  interface_ip.num_rd_ports        = XML->sys.core[ithCore].memory_ports;
		  interface_ip.num_wr_ports        = XML->sys.core[ithCore].memory_ports;
		  interface_ip.num_se_rd_ports     = 0;
		  interface_ip.num_search_ports    =XML->sys.core[ithCore].memory_ports;
		  LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
		  LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area);
		  area.set_area(area.get_area()+ LoadQ->local_result.area);
		  area.set_area(area.get_area()*cdb_overhead);
		  //output_data_csv(LoadQ.LoadQ.local_result);
		  lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
	  }

}

MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
:XML(XML_interface),
 ithCore(ithCore_),
 interface_ip(*interface_ip_),
 coredynp(dyn_p_),
 itlb(0),
 dtlb(0),
 exist(exist_)
{
	  if (!exist) return;
	  int  tag, data;
	  bool debug= false;

	  clockRate = coredynp.clockRate;
	  executionTime = coredynp.executionTime;

	  interface_ip.is_cache			   = true;
	  interface_ip.pure_cam            = false;
	  interface_ip.pure_ram            = false;
	  interface_ip.specific_tag        = 1;
	  //Itlb TLBs are partioned among threads according to Nigara and Nehalem
	  tag							   = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
	  data							   = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = 0;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
	  interface_ip.num_se_rd_ports = 0;
	  interface_ip.num_search_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
	  itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty);
	  itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area);
	  area.set_area(area.get_area()+ itlb->local_result.area);
	  //output_data_csv(itlb.tlb.local_result);

	  //dtlb
	  tag							   = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
	  data							   = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
	  interface_ip.specific_tag        = 1;
	  interface_ip.tag_w               = tag;
	  interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
	  interface_ip.cache_sz            = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
	  interface_ip.assoc               = 0;
	  interface_ip.nbanks              = 1;
	  interface_ip.out_w               = interface_ip.line_sz*8;
	  interface_ip.access_mode         = 0;
	  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
	  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
	  interface_ip.obj_func_dyn_energy = 0;
	  interface_ip.obj_func_dyn_power  = 0;
	  interface_ip.obj_func_leak_power = 0;
	  interface_ip.obj_func_cycle_t    = 1;
	  interface_ip.num_rw_ports    = 0;
	  interface_ip.num_rd_ports    = 0;
	  interface_ip.num_wr_ports    = XML->sys.core[ithCore].memory_ports;
	  interface_ip.num_se_rd_ports = 0;
	  interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
	  dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty);
	  dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area);
	  area.set_area(area.get_area()+ dtlb->local_result.area);
	  //output_data_csv(dtlb.tlb.local_result);

}
//#define FERMI

RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,double exClockRate,bool exist_)
:XML(XML_interface),
 ithCore(ithCore_),
 interface_ip(*interface_ip_),
 coredynp(dyn_p_),
 IRF (0),
 FRF (0),
 RFWIN (0),
 exist(exist_)
 {
	/*
	 * processors have separate architectural register files for each thread.
	 * therefore, the bypass buses need to travel across all the register files.
	 */
	if (!exist) return;
	int  data;
	clockRate = exClockRate;//coredynp.clockRate;
	executionTime = coredynp.executionTime;
  /*********************************************************************************
	* OC stage modelling (Syed Gilani)
	*********************************************************************************/
  
  //Crossbar
 
	if(XML->sys.architecture==1){
  xbar_rfu     = new Crossbar(XML->sys.core[ithCore].rf_banks/2,XML->sys.core[ithCore].collector_units/2
		                 ,(128),&(g_tp.peri_global));
	}else{
  xbar_rfu     = new Crossbar(XML->sys.core[ithCore].rf_banks,XML->sys.core[ithCore].collector_units
		                 ,(128),&(g_tp.peri_global));
	}

  //new Crossbar(simd_width,shared_memory_banks,word_length*simd_width,&(g_tp.peri_global));

  //Arbiter
  arbiter_rfu = new MCPAT_Arbiter(XML->sys.core[ithCore].rf_banks,XML->sys.core[ithCore].collector_units , 1,&(g_tp.peri_global));



	// RF banks modelled here for GPGPU-Sim (Syed Gilani)
	//
	//**********************************IRF***************************************
	data							 = coredynp.int_data_width;
  //data               *= 8;
	interface_ip.is_cache			 = false;
	interface_ip.pure_cam            = false;
	interface_ip.pure_ram            = true;

	interface_ip.line_sz             = 16;//int(ceil(data/32.0))*4 * XML->sys.core[ithCore].simd_width/4 ;//2 for Tesla as RF width half of SIMD width

	interface_ip.line_sz             = 16;//int(ceil(data/32.0))*4 * XML->sys.core[ithCore].simd_width/2 ;//2 for Tesla as RF width half of SIMD width

	interface_ip.cache_sz            = coredynp.num_IRF_entry*4;
	interface_ip.assoc               = 1;
	interface_ip.nbanks              = XML->sys.core[ithCore].rf_banks;

	interface_ip.out_w               = interface_ip.line_sz*8;//interface_ip.line_sz*XML->sys.core[ithCore].simd_width/4; //2 for Tesla and 4 for Fermi

	interface_ip.out_w               = interface_ip.line_sz*8;//interface_ip.line_sz*XML->sys.core[ithCore].simd_width/2; //2 for Tesla and 4 for Fermi


	interface_ip.access_mode         = 1;
	interface_ip.throughput          = 1/(clockRate);
	interface_ip.latency             = 8.0/(clockRate);
	interface_ip.obj_func_dyn_energy = 0;
	interface_ip.obj_func_dyn_power  = 0;
	interface_ip.obj_func_leak_power = 1;
	interface_ip.obj_func_cycle_t    = 0;
	interface_ip.num_rw_ports    = 0;//this is the transfer port for saving/restoring states when exceptions happen.
	interface_ip.num_rd_ports    = 1;//2*coredynp.peak_issueW;
	interface_ip.num_wr_ports    = 1;//coredynp.peak_issueW;
	interface_ip.num_se_rd_ports = 0;
	IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty);

	IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*coredynp.num_pipelines*cdb_overhead);


	area.set_area(area.get_area()+ IRF->local_result.area
		 + xbar_rfu->area.get_area() + arbiter_rfu->area.get_area());
	if(XML->sys.architecture==1){
	IRF->local_result.power.readOp.dynamic *= .33;
	IRF->local_result.power.writeOp.dynamic *= .33;
	}
	else {
	IRF->local_result.power.readOp.dynamic *= .55;
	IRF->local_result.power.writeOp.dynamic *= .55;
	}




	/**
	 * Operand collectors (32-bit wide, 8 entry banks )
	 */
	data							 = 32;
  //data               *= 8;
	interface_ip.is_cache			 = false;
	interface_ip.pure_cam            = false;
	interface_ip.pure_ram            = true;

	interface_ip.line_sz             = 4;//int(ceil(data/32.0))*4 * XML->sys.core[ithCore].simd_width/4 ;//2 for Tesla as RF width half of SIMD width

	interface_ip.line_sz             = 4;//int(ceil(data/32.0))*4 * XML->sys.core[ithCore].simd_width/2 ;//2 for Tesla as RF width half of SIMD width

	interface_ip.cache_sz            = 8*4;
	interface_ip.assoc               = 1;
	interface_ip.nbanks              = 1;

	interface_ip.out_w               = interface_ip.line_sz;//interface_ip.line_sz*XML->sys.core[ithCore].simd_width/4; //2 for Tesla and 4 for Fermi

	interface_ip.out_w               = interface_ip.line_sz;//interface_ip.line_sz*XML->sys.core[ithCore].simd_width/2; //2 for Tesla and 4 for Fermi


	interface_ip.access_mode         = 1;
	interface_ip.throughput          = 1.0/(clockRate);
	interface_ip.latency             = 1.0/(clockRate);
	interface_ip.obj_func_dyn_energy = 0;
	interface_ip.obj_func_dyn_power  = 0;
	interface_ip.obj_func_leak_power = 1;
	interface_ip.obj_func_cycle_t    = 0;
	interface_ip.num_rw_ports    = 0;//this is the transfer port for saving/restoring states when exceptions happen.
	interface_ip.num_rd_ports    = 1;//2*coredynp.peak_issueW;
	interface_ip.num_wr_ports    = 1;//coredynp.peak_issueW;
	interface_ip.num_se_rd_ports = 0;
	OPC = new ArrayST(&interface_ip, "Operand collectors", Core_device, coredynp.opt_local, coredynp.core_ty);

	OPC->area.set_area(OPC->area.get_area()+ OPC->local_result.area*coredynp.num_pipelines*cdb_overhead);


	area.set_area(area.get_area()+ OPC->local_result.area);




	/********
	 * For GPGPUSim (Syed Gilani)
	 * Do not include FRF in final results for GPU. Only model the IRF
	 ********/

	//**********************************FRF***************************************
	data							 = coredynp.fp_data_width;
  //data               *= 8;
	interface_ip.is_cache			 = false;
	interface_ip.pure_cam            = false;
	interface_ip.pure_ram            = true;
	interface_ip.line_sz             = int(ceil(data/32.0))*4;
	interface_ip.cache_sz            = coredynp.num_FRF_entry*interface_ip.line_sz;
	interface_ip.assoc               = 1;
	interface_ip.nbanks              = 1;
	interface_ip.out_w               = interface_ip.line_sz*8;
	interface_ip.access_mode         = 1;
	interface_ip.throughput          = 1.0/clockRate;
	interface_ip.latency             = 1.0/clockRate;
	interface_ip.obj_func_dyn_energy = 0;
	interface_ip.obj_func_dyn_power  = 0;
	interface_ip.obj_func_leak_power = 0;
	interface_ip.obj_func_cycle_t    = 1;
	interface_ip.num_rw_ports    = 1;//this is the transfer port for saving/restoring states when exceptions happen.
	interface_ip.num_rd_ports    = 2*XML->sys.core[ithCore].issue_width;
	//interface_ip.num_rd_ports    = 1;
	interface_ip.num_wr_ports    = XML->sys.core[ithCore].issue_width;
	//interface_ip.num_wr_ports    = 1;
	interface_ip.num_se_rd_ports = 0;
	FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
	//FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
	//area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
	//area.set_area(area.get_area()*cdb_overhead);
	//output_data_csv(FRF.RF.local_result);
	int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
	fp_regfile_height=0;
	//fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
    //since a EXU is associated with each pipeline, the cdb should not have longer length.
	if (coredynp.regWindowing)
	{
		//*********************************REG_WIN************************************
		data							 = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell
		interface_ip.is_cache			 = false;
		interface_ip.pure_cam            = false;
		interface_ip.pure_ram            = true;
		interface_ip.line_sz             = int(ceil(data/8.0));
		interface_ip.cache_sz            = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads;
		interface_ip.assoc               = 1;
		interface_ip.nbanks              = 1;
		interface_ip.out_w               = interface_ip.line_sz*8;
		interface_ip.access_mode         = 1;
		interface_ip.throughput          = 4.0/clockRate;
		interface_ip.latency             = 4.0/clockRate;
		interface_ip.obj_func_dyn_energy = 0;
		interface_ip.obj_func_dyn_power  = 0;
		interface_ip.obj_func_leak_power = 0;
		interface_ip.obj_func_cycle_t    = 1;
		interface_ip.num_rw_ports    = 1;//this is the transfer port for saving/restoring states when exceptions happen.
		interface_ip.num_rd_ports    = 0;
		interface_ip.num_wr_ports    = 0;
		interface_ip.num_se_rd_ports = 0;
		RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty);
		RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
		area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
		//output_data_csv(RFWIN.RF.local_result);
	}


 }

EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_,  double exClockRate, bool exist_)
:XML(XML_interface),
 ithCore(ithCore_),
 interface_ip(*interface_ip_),
 lsq_height(lsq_height_),
 coredynp(dyn_p_),
 rfu(0),
 scheu(0),
 fp_u(0),
 exeu(0),
 mul(0),
 int_bypass(0),
 intTagBypass(0),
 int_mul_bypass(0),
 intTag_mul_Bypass(0),
 fp_bypass(0),
 fpTagBypass(0),
 exist(exist_),
 rf_fu_clockRate(exClockRate)
{
	  if (!exist) return;
	  double fu_height = 0.0;
      clockRate = coredynp.clockRate;
		//cout <<"EXECU exClockRate: "<<exClockRate<<endl;
      executionTime = coredynp.executionTime;
	  rfu   = new RegFU(XML, ithCore, &interface_ip,coredynp,exClockRate);
	  scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp);
	  exeu  = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU,exClockRate);
	  area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() );
	  fu_height = exeu->FU_height;
	  if (coredynp.num_fpus >0)
	  {
		  fp_u  = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU, exClockRate);
		  area.set_area(area.get_area()+ fp_u->area.get_area());
	  }
	  if (coredynp.num_muls >0)
	  {
		  mul   = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL, exClockRate);
		  area.set_area(area.get_area()+ mul->area.get_area());
		  fu_height +=  mul->FU_height;
	  }
	  /*
	   * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast
	   * integer by pass has two paths and fp has 3 paths.
	   * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus
	   */
	  if (XML->sys.Embedded)
	  		{
	  		interface_ip.wt                  =Global_30;
	  		interface_ip.wire_is_mat_type = 0;
	  		interface_ip.wire_os_mat_type = 0;
	  	    interface_ip.throughput       = 1.0/clockRate;
	  	    interface_ip.latency          = 1.0/clockRate;
	  		}
	  	else
	  		{
	  		interface_ip.wt                  =Global;
	  		interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used
	  		interface_ip.wire_os_mat_type = 2;
	  	    interface_ip.throughput       = 10.0/clockRate; //Do not care
	  	    interface_ip.latency          = 10.0/clockRate;
	  		}

	  if (coredynp.core_ty==Inorder)
	  {//
		  int_bypass   = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32),
				  rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
				  false, 1.0, coredynp.opt_local, coredynp.core_ty);
		  bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area());
		  intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
				  rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
				  false, 1.0, coredynp.opt_local, coredynp.core_ty);
		  bypass.area.set_area(bypass.area.get_area()  +intTagBypass->area.get_area());

		  if (coredynp.num_muls>0)
		  {
			  int_mul_bypass     = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
					  rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
					  false, 1.0, coredynp.opt_local, coredynp.core_ty);
			  bypass.area.set_area(bypass.area.get_area()  +int_mul_bypass->area.get_area());
			  intTag_mul_Bypass  = new interconnect("Mul Bypass tag"  , Core_device, 1, 1, coredynp.perThreadState,
					  rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
					  false, 1.0, coredynp.opt_local, coredynp.core_ty);
			  bypass.area.set_area(bypass.area.get_area()  +intTag_mul_Bypass->area.get_area());
		  }

		  /*
		  if (coredynp.num_fpus>0)
		  {
			  fp_bypass    = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
					  rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
					  false, 1.0, coredynp.opt_local, coredynp.core_ty);
			  bypass.area.set_area(bypass.area.get_area()  +fp_bypass->area.get_area());
			  fpTagBypass  = new interconnect("FP Bypass tag"  , Core_device, 1, 1, coredynp.perThreadState,
					  rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
					  false, 1.0, coredynp.opt_local, coredynp.core_ty);
			  bypass.area.set_area(bypass.area.get_area()  +fpTagBypass->area.get_area());
		  }*/
	  } /* if (coredynp.core_ty==Inorder) */ 
	  else
	  {//OOO
		  if (coredynp.scheu_ty==PhysicalRegFile)
		  {
			  /* For physical register based OOO,
			   * data broadcast interconnects cover across functional units, lsq, inst windows and register files,
			   * while tag broadcast interconnects also cover across ROB
			   */
			  int_bypass   = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
					            rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
								false, 1.0, coredynp.opt_local, coredynp.core_ty);
			  bypass.area.set_area(bypass.area.get_area()  +int_bypass->area.get_area());
			  intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
					            rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
								false, 1.0, coredynp.opt_local, coredynp.core_ty);

			  if (coredynp.num_muls>0)
			  {
				  int_mul_bypass   = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
										rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
										false, 1.0, coredynp.opt_local, coredynp.core_ty);
				  intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
										rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
										false, 1.0, coredynp.opt_local, coredynp.core_ty);
				  bypass.area.set_area(bypass.area.get_area()  +int_mul_bypass->area.get_area());
				  bypass.area.set_area(bypass.area.get_area()  +intTag_mul_Bypass->area.get_area());
			  }

			  if (coredynp.num_fpus>0)
			  {
				  fp_bypass    = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
								  rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
								  false, 1.0, coredynp.opt_local, coredynp.core_ty);
				  fpTagBypass  = new interconnect("FP Bypass tag"  , Core_device, 1, 1, coredynp.phy_freg_width,
								  rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
								  false, 1.0, coredynp.opt_local, coredynp.core_ty);
				  bypass.area.set_area(bypass.area.get_area()  +fp_bypass->area.get_area());
				  bypass.area.set_area(bypass.area.get_area()  +fpTagBypass->area.get_area());
			  }
		  }
		  else
		  {
             /*
              * In RS based processor both data and tag are broadcast together,
              * covering functional units, lsq, nst windows, register files, and ROBs
              */
			  int_bypass   = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
					            rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
								  false, 1.0, coredynp.opt_local, coredynp.core_ty);
			  intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
					            rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
								  false, 1.0, coredynp.opt_local, coredynp.core_ty);
			  bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
			  bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
			  if (coredynp.num_muls>0)
			  {
				  int_mul_bypass   = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
						            rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
									  false, 1.0, coredynp.opt_local, coredynp.core_ty);
				  intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
						            rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
									  false, 1.0, coredynp.opt_local, coredynp.core_ty);
				  bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
				  bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
			  }

			  if (coredynp.num_fpus>0)
			  {
				  fp_bypass    = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
						  rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
						  false, 1.0, coredynp.opt_local, coredynp.core_ty);
				  fpTagBypass  = new interconnect("FP Bypass tag"  , Core_device, 1, 1, coredynp.phy_freg_width,
						  rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
						  false, 1.0, coredynp.opt_local, coredynp.core_ty);
				  bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
				  bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
			  }
		  } /* else */ 


	  } /* else */ 
	  area.set_area(area.get_area()/*+ bypass.area.get_area()*/);
}

RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
:XML(XML_interface),
 ithCore(ithCore_),
 interface_ip(*interface_ip_),
 coredynp(dyn_p_),
 iFRAT(0),
 fFRAT(0),
 iRRAT(0),
 fRRAT(0),
 ifreeL(0),
 ffreeL(0),
 idcl(0),
 fdcl(0),
 RAHT(0),
 exist(exist_)
 {
	/*
	 * Although renaming logic maybe be used in in-order processors,
     * McPAT assumes no renaming logic is used since the performance gain is very limited and
     * the only major inorder processor with renaming logic is Itainium
     * that is a VLIW processor and different from current McPAT's model.
	 * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT;
	 * i,f prefix mean int and fp
	 * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires.
	 * FRAT will be read twice and written once per instruction;
	 * RRAT will be write once per instruction when committing and reads out all when context switch
	 * checkpointing is implicit
	 * Renaming logic is duplicated for each different hardware threads
	 *
	 * No Dual-RAT is needed in RS-based OOO processors,
	 * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
	 * to make sure all the renamings associated with the ROB to be released are updated at the same time.
	 * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag,
	 * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag,
	 *
	 * Both RAM and CAM have same DCL
	 */
	if (!exist) return;
	int  tag, data, out_w;
//	interface_ip.wire_is_mat_type = 0;
//	interface_ip.wire_os_mat_type = 0;
//	interface_ip.wt               = Global_30;
	clockRate = coredynp.clockRate;
	executionTime = coredynp.executionTime;
    if (coredynp.core_ty==OOO)
    {
	//integer pipeline
	if (coredynp.scheu_ty==PhysicalRegFile)
	{
		if (coredynp.rm_ty ==RAMbased)
		{	  //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions
			data							 = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
//			data							 = int(ceil(coredynp.phy_ireg_width/8.0));
			out_w                            = 1;//int(ceil(coredynp.phy_ireg_width/8.0));
			interface_ip.is_cache			 = false;
			interface_ip.pure_cam            = false;
			interface_ip.pure_ram            = true;
			interface_ip.line_sz             = data;
			interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
			interface_ip.assoc               = 1;
			interface_ip.nbanks              = 1;
			interface_ip.out_w               = out_w*8;
			interface_ip.access_mode         = 2;
			interface_ip.throughput          = 1.0/clockRate;
			interface_ip.latency             = 1.0/clockRate;
			interface_ip.obj_func_dyn_energy = 0;
			interface_ip.obj_func_dyn_power  = 0;
			interface_ip.obj_func_leak_power = 0;
			interface_ip.obj_func_cycle_t    = 1;
			interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
			interface_ip.num_rd_ports    = 2*coredynp.decodeW;
			interface_ip.num_wr_ports    = coredynp.decodeW;
			interface_ip.num_se_rd_ports = 0;
			iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
			iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
			area.set_area(area.get_area()+ iFRAT->area.get_area());

//			//RAHT According to Intel, combine GC with FRAT is very costly.
//			data							 = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry);
//			out_w                            = data;
//			interface_ip.is_cache			 = false;
//			interface_ip.pure_cam            = false;
//			interface_ip.pure_ram            = true;
//			interface_ip.line_sz             = data;
//			interface_ip.cache_sz            = data*coredynp.globalCheckpoint;
//			interface_ip.assoc               = 1;
//			interface_ip.nbanks              = 1;
//			interface_ip.out_w               = out_w*8;
//			interface_ip.access_mode         = 0;
//			interface_ip.throughput          = 1.0/clockRate;
//			interface_ip.latency             = 1.0/clockRate;
//			interface_ip.obj_func_dyn_energy = 0;
//			interface_ip.obj_func_dyn_power  = 0;
//			interface_ip.obj_func_leak_power = 0;
//			interface_ip.obj_func_cycle_t    = 1;
//			interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
//			interface_ip.num_rd_ports    = 2*coredynp.decodeW;
//			interface_ip.num_wr_ports    = coredynp.decodeW;
//			interface_ip.num_se_rd_ports = 0;
//			iFRAT = new ArrayST(&interface_ip, "Int FrontRAT");
//			iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
//			area.set_area(area.get_area()+ iFRAT->area.get_area());

			//FRAT floating point
			data							 = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
			out_w                            = int(ceil(coredynp.phy_freg_width/8.0));
			interface_ip.is_cache			 = false;
			interface_ip.pure_cam            = false;
			interface_ip.pure_ram            = true;
			interface_ip.line_sz             = data;
			interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
			interface_ip.assoc               = 1;
			interface_ip.nbanks              = 1;
			interface_ip.out_w               = out_w*8;
			interface_ip.access_mode         = 2;
			interface_ip.throughput          = 1.0/clockRate;
			interface_ip.latency             = 1.0/clockRate;
			interface_ip.obj_func_dyn_energy = 0;
			interface_ip.obj_func_dyn_power  = 0;
			interface_ip.obj_func_leak_power = 0;
			interface_ip.obj_func_cycle_t    = 1;
			interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
			interface_ip.num_rd_ports    = 2*coredynp.fp_decodeW;
			interface_ip.num_wr_ports    = coredynp.fp_decodeW;
			interface_ip.num_se_rd_ports = 0;
			fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
			fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
			area.set_area(area.get_area()+ fFRAT->area.get_area());

		}
		else if ((coredynp.rm_ty ==CAMbased))
		{
			//FRAT
			tag							     = coredynp.arch_ireg_width;
			data							 = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
			out_w                            = int(ceil (coredynp.arch_ireg_width/8.0));
			interface_ip.is_cache			 = true;
			interface_ip.pure_cam            = false;
			interface_ip.pure_ram            = false;
			interface_ip.line_sz             = data;
			interface_ip.cache_sz            = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
			interface_ip.assoc               = 0;
			interface_ip.nbanks              = 1;
			interface_ip.out_w               = out_w*8;
			interface_ip.specific_tag        = 1;
			interface_ip.tag_w               = tag;
			interface_ip.access_mode         = 2;
			interface_ip.throughput          = 1.0/clockRate;
			interface_ip.latency             = 1.0/clockRate;
			interface_ip.obj_func_dyn_energy = 0;
			interface_ip.obj_func_dyn_power  = 0;
			interface_ip.obj_func_leak_power = 0;
			interface_ip.obj_func_cycle_t    = 1;
			interface_ip.num_rw_ports    = 1;//for GCs
			interface_ip.num_rd_ports    = coredynp.decodeW;
			interface_ip.num_wr_ports    = coredynp.decodeW;
			interface_ip.num_se_rd_ports = 0;
			interface_ip.num_search_ports= 2*coredynp.decodeW;
			iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
			iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
			area.set_area(area.get_area()+ iFRAT->area.get_area());

			//FRAT for FP
			tag							     = coredynp.arch_freg_width;
			data							 = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
			out_w                            = int(ceil (coredynp.arch_freg_width/8.0));
			interface_ip.is_cache			 = true;
			interface_ip.pure_cam            = false;
			interface_ip.pure_ram            = false;
			interface_ip.line_sz             = data;
			interface_ip.cache_sz            = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
			interface_ip.assoc               = 0;
			interface_ip.nbanks              = 1;
			interface_ip.out_w               = out_w*8;
			interface_ip.specific_tag        = 1;
			interface_ip.tag_w               = tag;
			interface_ip.access_mode         = 2;
			interface_ip.throughput          = 1.0/clockRate;
			interface_ip.latency             = 1.0/clockRate;
			interface_ip.obj_func_dyn_energy = 0;
			interface_ip.obj_func_dyn_power  = 0;
			interface_ip.obj_func_leak_power = 0;
			interface_ip.obj_func_cycle_t    = 1;
			interface_ip.num_rw_ports    = 1;//for GCs
			interface_ip.num_rd_ports    = coredynp.fp_decodeW;
			interface_ip.num_wr_ports    = coredynp.fp_decodeW;
			interface_ip.num_se_rd_ports = 0;
			interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
			fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
			fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
			area.set_area(area.get_area()+ fFRAT->area.get_area());

		}

		//RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping
		data							 = int(ceil(coredynp.phy_ireg_width/8.0));
		interface_ip.is_cache			 = false;
		interface_ip.pure_cam            = false;
		interface_ip.pure_ram            = true;
		interface_ip.line_sz             = data;
		interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B
		interface_ip.assoc               = 1;
		interface_ip.nbanks              = 1;
		interface_ip.out_w               = interface_ip.line_sz*8;
		interface_ip.access_mode         = 1;
		interface_ip.throughput          = 1.0/clockRate;
		interface_ip.latency             = 1.0/clockRate;
		interface_ip.obj_func_dyn_energy = 0;
		interface_ip.obj_func_dyn_power  = 0;
		interface_ip.obj_func_leak_power = 0;
		interface_ip.obj_func_cycle_t    = 1;
		interface_ip.num_rw_ports    = 0;
		interface_ip.num_rd_ports    = XML->sys.core[ithCore].commit_width;
		interface_ip.num_wr_ports    = XML->sys.core[ithCore].commit_width;
		interface_ip.num_se_rd_ports = 0;
		iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
		iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
		area.set_area(area.get_area()+ iRRAT->area.get_area());

		//RRAT for FP
		data							 = int(ceil(coredynp.phy_freg_width/8.0));
		interface_ip.is_cache			 = false;
		interface_ip.pure_cam            = false;
		interface_ip.pure_ram            = true;
		interface_ip.line_sz             = data;
		interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B
		interface_ip.assoc               = 1;
		interface_ip.nbanks              = 1;
		interface_ip.out_w               = interface_ip.line_sz*8;
		interface_ip.access_mode         = 1;
		interface_ip.throughput          = 1.0/clockRate;
		interface_ip.latency             = 1.0/clockRate;
		interface_ip.obj_func_dyn_energy = 0;
		interface_ip.obj_func_dyn_power  = 0;
		interface_ip.obj_func_leak_power = 0;
		interface_ip.obj_func_cycle_t    = 1;
		interface_ip.num_rw_ports    = 0;
		interface_ip.num_rd_ports    = coredynp.fp_decodeW;
		interface_ip.num_wr_ports    = coredynp.fp_decodeW;
		interface_ip.num_se_rd_ports = 0;
		fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
		fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
		area.set_area(area.get_area()+ fRRAT->area.get_area());

		//Freelist of renaming unit always RAM based
		//Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
		// 2)When instruction commits the Phyregisters/ROB needed to be recycled.
		//therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
		data							 = int(ceil(coredynp.phy_ireg_width/8.0));
		interface_ip.is_cache			 = false;
		interface_ip.pure_cam            = false;
		interface_ip.pure_ram            = true;
		interface_ip.line_sz             = data;
		interface_ip.cache_sz            = data*coredynp.num_ifreelist_entries;
		interface_ip.assoc               = 1;
		interface_ip.nbanks              = 1;
		interface_ip.out_w               = interface_ip.line_sz*8;
		interface_ip.access_mode         = 1;
		interface_ip.throughput          = 1.0/clockRate;
		interface_ip.latency             = 1.0/clockRate;
		interface_ip.obj_func_dyn_energy = 0;
		interface_ip.obj_func_dyn_power  = 0;
		interface_ip.obj_func_leak_power = 0;
		interface_ip.obj_func_cycle_t    = 1;
		interface_ip.num_rw_ports    = 1;//TODO
		interface_ip.num_rd_ports    = coredynp.decodeW;
		interface_ip.num_wr_ports    = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width;
		//every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers
		interface_ip.num_se_rd_ports = 0;
		ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
		ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
		area.set_area(area.get_area()+ ifreeL->area.get_area());

		//freelist for FP
		data							 = int(ceil(coredynp.phy_freg_width/8.0));
		interface_ip.is_cache			 = false;
		interface_ip.pure_cam            = false;
		interface_ip.pure_ram            = true;
		interface_ip.line_sz             = data;
		interface_ip.cache_sz            = data*coredynp.num_ffreelist_entries;
		interface_ip.assoc               = 1;
		interface_ip.nbanks              = 1;
		interface_ip.out_w               = interface_ip.line_sz*8;
		interface_ip.access_mode         = 1;
		interface_ip.throughput          = 1.0/clockRate;
		interface_ip.latency             = 1.0/clockRate;
		interface_ip.obj_func_dyn_energy = 0;
		interface_ip.obj_func_dyn_power  = 0;
		interface_ip.obj_func_leak_power = 0;
		interface_ip.obj_func_cycle_t    = 1;
		interface_ip.num_rw_ports    = 1;
		interface_ip.num_rd_ports    = coredynp.fp_decodeW;
		interface_ip.num_wr_ports    = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width;
		interface_ip.num_se_rd_ports = 0;
		ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
		ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
		area.set_area(area.get_area()+ ffreeL->area.get_area());

		idcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
		fdcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);

	}
	else if (coredynp.scheu_ty==ReservationStation){
		if (coredynp.rm_ty ==RAMbased){
			/*
			 * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
			 * to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time.
			 * RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than
			 * CAM based RAT so that it is more scalable as number of ROB/physical regs increases.
			 */
			tag							     = coredynp.phy_ireg_width;
			data							 = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
			out_w                            = int(ceil(coredynp.phy_ireg_width/8.0));
			interface_ip.is_cache			 = true;
			interface_ip.pure_cam            = false;
			interface_ip.pure_ram            = false;
			interface_ip.line_sz             = data;
			interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
			interface_ip.assoc               = 0;
			interface_ip.nbanks              = 1;
			interface_ip.out_w               = out_w*8;
			interface_ip.access_mode         = 2;
			interface_ip.throughput          = 1.0/clockRate;
			interface_ip.latency             = 1.0/clockRate;
			interface_ip.obj_func_dyn_energy = 0;
			interface_ip.obj_func_dyn_power  = 0;
			interface_ip.obj_func_leak_power = 0;
			interface_ip.obj_func_cycle_t    = 1;
			interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
			interface_ip.num_rd_ports    = 2*coredynp.decodeW;
			interface_ip.num_wr_ports    = coredynp.decodeW;
			interface_ip.num_se_rd_ports = 0;
			interface_ip.num_search_ports= coredynp.commitW;//TODO
			iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
			iFRAT->local_result.adjust_area();
			iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
			area.set_area(area.get_area()+ iFRAT->area.get_area());

			//FP
			tag							     = coredynp.phy_freg_width;
			data							 = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
			out_w                            = int(ceil(coredynp.phy_freg_width/8.0));
			interface_ip.is_cache			 = true;
			interface_ip.pure_cam            = false;
			interface_ip.pure_ram            = false;
			interface_ip.line_sz             = data;
			interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
			interface_ip.assoc               = 0;
			interface_ip.nbanks              = 1;
			interface_ip.out_w               = out_w*8;
			interface_ip.access_mode         = 2;
			interface_ip.throughput          = 1.0/clockRate;
			interface_ip.latency             = 1.0/clockRate;
			interface_ip.obj_func_dyn_energy = 0;
			interface_ip.obj_func_dyn_power  = 0;
			interface_ip.obj_func_leak_power = 0;
			interface_ip.obj_func_cycle_t    = 1;
			interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
			interface_ip.num_rd_ports    = 2*coredynp.fp_decodeW;
			interface_ip.num_wr_ports    = coredynp.fp_decodeW;
			interface_ip.num_se_rd_ports = 0;
			interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width
			fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
			fFRAT->local_result.adjust_area();
			fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
			area.set_area(area.get_area()+ fFRAT->area.get_area());

		}
		else if ((coredynp.rm_ty ==CAMbased))
		{
			//FRAT
			tag							     = coredynp.arch_ireg_width;
			data							 = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
			out_w                            = int(ceil (coredynp.arch_ireg_width/8.0));
			interface_ip.is_cache			 = true;
			interface_ip.pure_cam            = false;
			interface_ip.pure_ram            = false;
			interface_ip.line_sz             = data;
			interface_ip.cache_sz            = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
			interface_ip.assoc               = 0;
			interface_ip.nbanks              = 1;
			interface_ip.out_w               = out_w*8;
			interface_ip.specific_tag        = 1;
			interface_ip.tag_w               = tag;
			interface_ip.access_mode         = 2;
			interface_ip.throughput          = 1.0/clockRate;
			interface_ip.latency             = 1.0/clockRate;
			interface_ip.obj_func_dyn_energy = 0;
			interface_ip.obj_func_dyn_power  = 0;
			interface_ip.obj_func_leak_power = 0;
			interface_ip.obj_func_cycle_t    = 1;
			interface_ip.num_rw_ports    = 1;//for GCs
			interface_ip.num_rd_ports    = XML->sys.core[ithCore].decode_width;//0;TODO
			interface_ip.num_wr_ports    = XML->sys.core[ithCore].decode_width;
			interface_ip.num_se_rd_ports = 0;
			interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width;
			iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
			iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
			area.set_area(area.get_area()+ iFRAT->area.get_area());

			//FRAT
			tag							     = coredynp.arch_freg_width;
			data							 = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
			out_w                            = int(ceil (coredynp.arch_freg_width/8.0));
			interface_ip.is_cache			 = true;
			interface_ip.pure_cam            = false;
			interface_ip.pure_ram            = false;
			interface_ip.line_sz             = data;
			interface_ip.cache_sz            = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
			interface_ip.assoc               = 0;
			interface_ip.nbanks              = 1;
			interface_ip.out_w               = out_w*8;
			interface_ip.specific_tag        = 1;
			interface_ip.tag_w               = tag;
			interface_ip.access_mode         = 2;
			interface_ip.throughput          = 1.0/clockRate;
			interface_ip.latency             = 1.0/clockRate;
			interface_ip.obj_func_dyn_energy = 0;
			interface_ip.obj_func_dyn_power  = 0;
			interface_ip.obj_func_leak_power = 0;
			interface_ip.obj_func_cycle_t    = 1;
			interface_ip.num_rw_ports    = 1;//for GCs
			interface_ip.num_rd_ports    = XML->sys.core[ithCore].decode_width;//0;TODO;
			interface_ip.num_wr_ports    = coredynp.fp_decodeW;
			interface_ip.num_se_rd_ports = 0;
			interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
			fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
			fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
			area.set_area(area.get_area()+ fFRAT->area.get_area());

		}
		//No RRAT for RS based OOO
		//Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
		data							 = int(ceil(coredynp.phy_ireg_width/8.0));
		interface_ip.is_cache			 = false;
		interface_ip.pure_cam            = false;
		interface_ip.pure_ram            = true;
		interface_ip.line_sz             = data;
		interface_ip.cache_sz            = data*coredynp.num_ifreelist_entries;
		interface_ip.assoc               = 1;
		interface_ip.nbanks              = 1;
		interface_ip.out_w               = interface_ip.line_sz*8;
		interface_ip.access_mode         = 1;
		interface_ip.throughput          = 1.0/clockRate;
		interface_ip.latency             = 1.0/clockRate;
		interface_ip.obj_func_dyn_energy = 0;
		interface_ip.obj_func_dyn_power  = 0;
		interface_ip.obj_func_leak_power = 0;
		interface_ip.obj_func_cycle_t    = 1;
		interface_ip.num_rw_ports    = 1;//TODO
		interface_ip.num_rd_ports    = XML->sys.core[ithCore].decode_width;
		interface_ip.num_wr_ports    = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width;
		interface_ip.num_se_rd_ports = 0;
		ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
		ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
		area.set_area(area.get_area()+ ifreeL->area.get_area());

		idcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
		fdcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
	}

}
    if (coredynp.core_ty==Inorder&& coredynp.issueW>1)
    {
	  /* Dependency check logic will only present when decode(issue) width>1.
	  *  Multiple issue in order processor can do without renaming, but dcl is a must.
	  */
	idcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
	fdcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
    }
}

Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_)
:XML(XML_interface),
 ithCore(ithCore_),
 interface_ip(*interface_ip_),
 ifu  (0),
 lsu  (0),
 mmu  (0),
 exu  (0),
 rnu  (0),
 corepipe (0),
 undiffCore (0),
 l2cache (0)
{
 /**
  * Testing: (to be removed) added by syed
  */
  //XML->sys.core[ithCore].simd_width=8;// (8)
  //XML->sys.core[ithCore].collector_units=4;// (4)
  //XML->sys.core[ithCore].core_clock_ratio=2.0;// (2.0)
  //XML->sys.core[ithCore].warp_size=32;// (32) 
  
  /*
  * initialize, compute and optimize individual components.
  */

	IdleCoreEnergy=0;
	IdlePower_PerCore = 0;
  double pipeline_area_per_unit;
  if (XML->sys.Private_L2)
  {
	  l2cache = new SharedCache(XML,ithCore, &interface_ip);

  }
//  interface_ip.wire_is_mat_type = 2;
//  interface_ip.wire_os_mat_type = 2;
//  interface_ip.wt               =Global_30;
  set_core_param();
  clockRate = coredynp.clockRate;
  exClockRate = clockRate*XML->sys.core[ithCore].core_clock_ratio;

  executionTime = coredynp.executionTime;
  ifu          = new InstFetchU(XML, ithCore, &interface_ip,coredynp);
  lsu          = new LoadStoreU(XML, ithCore, &interface_ip,coredynp);
  mmu          = new MemManU   (XML, ithCore, &interface_ip,coredynp);
  exu          = new EXECU     (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp, exClockRate,true);
  




  undiffCore   = new UndiffCore(XML, ithCore, &interface_ip,coredynp);
  if (coredynp.core_ty==OOO)
  {
	  rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp);
  }
  corepipe = new Pipeline(&interface_ip,coredynp);

  if (coredynp.core_ty==OOO)
  {
	  pipeline_area_per_unit    = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0;
	  if (rnu->exist)
	  {
		  rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
	  }
  }
  else {
	  pipeline_area_per_unit    = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0;
  }

  //area.set_area(area.get_area()+ corepipe->area.get_area());
  if (ifu->exist)
  {
	  ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
	  area.set_area(area.get_area() + ifu->area.get_area());
  }
  if (lsu->exist)
  {
	  lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
      area.set_area(area.get_area() + lsu->area.get_area());
  }
  if (exu->exist)
  {
	  exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
	  area.set_area(area.get_area()+exu->area.get_area());
  }
  if (mmu->exist)
  {
	  mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
      area.set_area(area.get_area()+mmu->area.get_area());
  }

  if (coredynp.core_ty==OOO)
  {
	  if (rnu->exist)
	  {

		  area.set_area(area.get_area() + rnu->area.get_area());
	  }
  }

  if (undiffCore->exist)
  {
	  area.set_area(area.get_area() + undiffCore->area.get_area());
  }

  if (XML->sys.Private_L2)
  {
	  area.set_area(area.get_area() + l2cache->area.get_area());

  }
//  //clock power
//  clockNetwork.init_wire_external(is_default, &interface_ip);
//  clockNetwork.clk_area           =area*1.1;//10% of placement overhead. rule of thumb
//  clockNetwork.end_wiring_level   =5;//toplevel metal
//  clockNetwork.start_wiring_level =5;//toplevel metal
//  clockNetwork.num_regs           = corepipe.tot_stage_vector;
//  clockNetwork.optimize_wire();
}


void BranchPredictor::computeEnergy(bool is_tdp)
{
	if (!exist) return;
	double r_access;
	double w_access;
	if (is_tdp)
    {
    	r_access = coredynp.predictionW*coredynp.BR_duty_cycle;
    	w_access = 0*coredynp.BR_duty_cycle;
    	globalBPT->stats_t.readAc.access  = r_access;
    	globalBPT->stats_t.writeAc.access = w_access;
    	globalBPT->tdp_stats = globalBPT->stats_t;

    	L1_localBPT->stats_t.readAc.access  = r_access;
    	L1_localBPT->stats_t.writeAc.access = w_access;
    	L1_localBPT->tdp_stats = L1_localBPT->stats_t;

    	L2_localBPT->stats_t.readAc.access  = r_access;
    	L2_localBPT->stats_t.writeAc.access = w_access;
    	L2_localBPT->tdp_stats = L2_localBPT->stats_t;

    	chooser->stats_t.readAc.access  = r_access;
    	chooser->stats_t.writeAc.access = w_access;
    	chooser->tdp_stats = chooser->stats_t;

    	RAS->stats_t.readAc.access  = r_access;
    	RAS->stats_t.writeAc.access = w_access;
    	RAS->tdp_stats = RAS->stats_t;
    }
    else
    {
    	//The resolution of BPT accesses is coarse, but this is
    	//because most simulators cannot track finer grained details
    	r_access = XML->sys.core[ithCore].branch_instructions;
    	w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0
    	globalBPT->stats_t.readAc.access  = r_access;
    	globalBPT->stats_t.writeAc.access = w_access;
    	globalBPT->rtp_stats = globalBPT->stats_t;

    	L1_localBPT->stats_t.readAc.access  = r_access;
    	L1_localBPT->stats_t.writeAc.access = w_access;
    	L1_localBPT->rtp_stats = L1_localBPT->stats_t;

    	L2_localBPT->stats_t.readAc.access  = r_access;
    	L2_localBPT->stats_t.writeAc.access = w_access;
    	L2_localBPT->rtp_stats = L2_localBPT->stats_t;

    	chooser->stats_t.readAc.access  = r_access;
    	chooser->stats_t.writeAc.access = w_access;
    	chooser->rtp_stats = chooser->stats_t;

    	RAS->stats_t.readAc.access  = XML->sys.core[ithCore].function_calls;
    	RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls;
    	RAS->rtp_stats = RAS->stats_t;
   }

	globalBPT->power_t.reset();
	L1_localBPT->power_t.reset();
	L2_localBPT->power_t.reset();
	chooser->power_t.reset();
	RAS->power_t.reset();

    globalBPT->power_t.readOp.dynamic   +=  globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access +
                globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic;
    L1_localBPT->power_t.readOp.dynamic   +=  L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access +
                L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic;

    L2_localBPT->power_t.readOp.dynamic   +=  L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access +
                L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic;

    chooser->power_t.readOp.dynamic   +=  chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access +
                chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic;
    RAS->power_t.readOp.dynamic   +=  RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access +
                RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic;

    if (is_tdp)
    {
    	globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
    	L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
    	L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
    	chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg;
    	RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;

    	power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power;
    }
    else
    {
    	globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
    	L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
    	L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
    	chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg;
    	RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
    	rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power;
    }
}

void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
	if (!exist) return;
	string indent_str(indent, ' ');
	string indent_str_next(indent+2, ' ');
	bool long_channel = XML->sys.longer_channel_device;
	if (is_tdp)
	{
		cout << indent_str<< "Global Predictor:" << endl;
		cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl;
		cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		cout << indent_str << "Local Predictor:" << endl;
		cout << indent_str << "L1_Local Predictor:" << endl;
		cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? L1_localBPT->power.readOp.longer_channel_leakage:L1_localBPT->power.readOp.leakage)  << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << L1_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		cout << indent_str << "L2_Local Predictor:" << endl;
		cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? L2_localBPT->power.readOp.longer_channel_leakage:L2_localBPT->power.readOp.leakage)  << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << L2_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;

		cout << indent_str << "Chooser:" << endl;
		cout << indent_str_next << "Area = " << chooser->area.get_area()  *1e-6 << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << chooser->power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? chooser->power.readOp.longer_channel_leakage:chooser->power.readOp.leakage)  << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << chooser->power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << chooser->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		cout << indent_str << "RAS:" << endl;
		cout << indent_str_next << "Area = " << RAS->area.get_area() *1e-6 << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << RAS->power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? RAS->power.readOp.longer_channel_leakage:RAS->power.readOp.leakage)  << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << RAS->power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
	}
	else
	{
//		cout << indent_str_next << "Global Predictor    Peak Dynamic = " << globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
//		cout << indent_str_next << "Global Predictor    Subthreshold Leakage = " << globalBPT->rt_power.readOp.leakage <<" W" << endl;
//		cout << indent_str_next << "Global Predictor    Gate Leakage = " << globalBPT->rt_power.readOp.gate_leakage << " W" << endl;
//		cout << indent_str_next << "Local Predictor   Peak Dynamic = " << L1_localBPT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
//		cout << indent_str_next << "Local Predictor   Subthreshold Leakage = " << L1_localBPT->rt_power.readOp.leakage  << " W" << endl;
//		cout << indent_str_next << "Local Predictor   Gate Leakage = " << L1_localBPT->rt_power.readOp.gate_leakage  << " W" << endl;
//		cout << indent_str_next << "Chooser   Peak Dynamic = " << chooser->rt_power.readOp.dynamic*clockRate  << " W" << endl;
//		cout << indent_str_next << "Chooser   Subthreshold Leakage = " << chooser->rt_power.readOp.leakage  << " W" << endl;
//		cout << indent_str_next << "Chooser   Gate Leakage = " << chooser->rt_power.readOp.gate_leakage  << " W" << endl;
//		cout << indent_str_next << "RAS   Peak Dynamic = " << RAS->rt_power.readOp.dynamic*clockRate  << " W" << endl;
//		cout << indent_str_next << "RAS   Subthreshold Leakage = " << RAS->rt_power.readOp.leakage  << " W" << endl;
//		cout << indent_str_next << "RAS   Gate Leakage = " << RAS->rt_power.readOp.gate_leakage  << " W" << endl;
	}

}

void InstFetchU::computeEnergy(bool is_tdp)
{
  executionTime=XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);//Syed
  //cout <<"IFU: execution time: "<<XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6)<<endl;
  //cout <<"IFU: total cycles"<<XML->sys.total_cycles<<endl;
	if (!exist) return;
	if (is_tdp)
    {
		//init stats for Peak
    	icache.caches->stats_t.readAc.access  = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle;
    	icache.caches->stats_t.readAc.miss    = 0;
    	icache.caches->stats_t.readAc.hit     = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
    	icache.caches->tdp_stats = icache.caches->stats_t;

    	icache.missb->stats_t.readAc.access  = icache.missb->stats_t.readAc.hit=  icache.missb->l_ip.num_search_ports;
    	icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports;
    	icache.missb->tdp_stats = icache.missb->stats_t;

    	icache.ifb->stats_t.readAc.access  = icache.ifb->stats_t.readAc.hit=  icache.ifb->l_ip.num_search_ports;
    	icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
    	icache.ifb->tdp_stats = icache.ifb->stats_t;

    	icache.prefetchb->stats_t.readAc.access  = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports;
    	icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
    	icache.prefetchb->tdp_stats = icache.prefetchb->stats_t;

    	IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width;
    	IB->tdp_stats = IB->stats_t;

    	if (coredynp.predictionW>0)
    	{
    		BTB->stats_t.readAc.access  = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses;
    		BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses;
    	}

    	ID_inst->stats_t.readAc.access     = coredynp.decodeW;
    	ID_operand->stats_t.readAc.access  = coredynp.decodeW;
    	ID_misc->stats_t.readAc.access     = coredynp.decodeW;
    	ID_inst->tdp_stats = ID_inst->stats_t;
    	ID_operand->tdp_stats = ID_operand->stats_t;
    	ID_misc->tdp_stats = ID_misc->stats_t;


   
	 } /* if (is_tdp) */
    else
    {
      rt_power.reset();
      icache.rt_power.reset(); //Jingwen
     	//init stats for Runtime Dynamic (RTP)
		//cout<< "****>>>>Icache stats:"<<endl;
		//cout<<"Read accesses: "<< XML->sys.core[ithCore].icache.read_accesses << " Read misses: "<<XML->sys.core[ithCore].icache.read_misses<<endl;
    	icache.caches->stats_t.readAc.access  = XML->sys.core[ithCore].icache.read_accesses;
    	icache.caches->stats_t.readAc.miss    = XML->sys.core[ithCore].icache.read_misses;
    	//cout<<endl<<"inside mcpat read access= "<<XML->sys.core[ithCore].icache.read_accesses;
    	//cout<<endl<<"inside mcpat read miss= "<<XML->sys.core[ithCore].icache.read_misses;

    	icache.caches->stats_t.readAc.hit     = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
    	icache.caches->rtp_stats = icache.caches->stats_t;
    	//cout<<endl<<"inside mcpat read hit= "<<icache.caches->stats_t.readAc.hit<<endl;
    	icache.missb->stats_t.readAc.access  = icache.caches->stats_t.readAc.miss;
    	icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
    	icache.missb->rtp_stats = icache.missb->stats_t;

    	icache.ifb->stats_t.readAc.access  = icache.caches->stats_t.readAc.miss;
    	icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
    	icache.ifb->rtp_stats = icache.ifb->stats_t;

    	icache.prefetchb->stats_t.readAc.access  = icache.caches->stats_t.readAc.miss;
    	icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
    	icache.prefetchb->rtp_stats = icache.prefetchb->stats_t;

    	IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions;
    	IB->rtp_stats = IB->stats_t;
    	//cout<<"IB: total instructions: "<<IB->stats_t.readAc.access <<endl;
    	if (coredynp.predictionW>0)
    	{
    		BTB->stats_t.readAc.access  = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions;
    		BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions;
    		BTB->rtp_stats = BTB->stats_t;
    	}
    	//cout<<"ID: total instructions: "<< XML->sys.core[ithCore].total_instructions<<endl;
    	ID_inst->stats_t.readAc.access     = XML->sys.core[ithCore].total_instructions;
    	ID_operand->stats_t.readAc.access  = XML->sys.core[ithCore].total_instructions;
    	ID_misc->stats_t.readAc.access     = XML->sys.core[ithCore].total_instructions;
    	ID_inst->rtp_stats = ID_inst->stats_t;
    	ID_operand->rtp_stats = ID_operand->stats_t;
    	ID_misc->rtp_stats = ID_misc->stats_t;

    }

    icache.power_t.reset();
    IB->power_t.reset();
//	ID_inst->power_t.reset();
//	ID_operand->power_t.reset();
//	ID_misc->power_t.reset();
    if (coredynp.predictionW>0)
    {
    	BTB->power_t.reset();
    }

    icache.power_t.readOp.dynamic	+= (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+
    		//icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+
    		icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel
    		icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache
    icache.power_t.readOp.dynamic	+=  icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic +
            icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
    icache.power_t.readOp.dynamic	+=  icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic +
            icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic;
    icache.power_t.readOp.dynamic	+=  icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic +
            icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic;
   //cout<<"Icache power: "<<icache.power_t.readOp.dynamic	<<endl;
	IB->power_t.readOp.dynamic   +=  IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access +
			IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic;
  //cout << "IB power: "<<IB->power_t.readOp.dynamic<<endl;
	if (coredynp.predictionW>0)
	{
		BTB->power_t.readOp.dynamic   +=  BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access +
		BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic;

		BPT->computeEnergy(is_tdp);
	}

    if (is_tdp)
    {
//    	icache.power = icache.power_t +
//    	        (icache.caches->local_result.power)*pppm_lkg +
//    			(icache.missb->local_result.power +
//    			icache.ifb->local_result.power +
//    			icache.prefetchb->local_result.power)*pppm_Isub;
    	icache.power = icache.power_t +
    	        (icache.caches->local_result.power +
    			icache.missb->local_result.power +
    			icache.ifb->local_result.power +
    			icache.prefetchb->local_result.power)*pppm_lkg;

    	IB->power = IB->power_t + IB->local_result.power*pppm_lkg;
    	power     = power + icache.power + IB->power;
    	if (coredynp.predictionW>0)
    	{
    		BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg;
    		power     = power  + BTB->power + BPT->power;
    	}

    	ID_inst->power_t.readOp.dynamic    = ID_inst->power.readOp.dynamic;
    	ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic;
    	ID_misc->power_t.readOp.dynamic    = ID_misc->power.readOp.dynamic;

    	ID_inst->power.readOp.dynamic    *= ID_inst->tdp_stats.readAc.access;
    	ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access;
    	ID_misc->power.readOp.dynamic    *= ID_misc->tdp_stats.readAc.access;

    	power = power + (ID_inst->power +
							ID_operand->power +
							ID_misc->power);
	 } /* if (is_tdp) */ 
    else
    {
//    	icache.rt_power = icache.power_t +
//    	        (icache.caches->local_result.power)*pppm_lkg +
//    			(icache.missb->local_result.power +
//    			icache.ifb->local_result.power +
//    			icache.prefetchb->local_result.power)*pppm_Isub;

    	icache.rt_power = icache.power_t +
    	        (icache.caches->local_result.power +
    			icache.missb->local_result.power +
    			icache.ifb->local_result.power +
    			icache.prefetchb->local_result.power)*pppm_lkg;

    	//IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg;
    	IB->rt_power.readOp.dynamic = IB->local_result.power.readOp.dynamic * IB->rtp_stats.readAc.access;
    	IB->rt_power.readOp.dynamic += IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access;
    	rt_power     = rt_power + icache.rt_power + IB->rt_power;
    	if (coredynp.predictionW>0)
    	{
    		BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg;
    		rt_power     = rt_power + BTB->rt_power + BPT->rt_power;
    	}

    	ID_inst->rt_power.readOp.dynamic    = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access;
    	ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access;
    	ID_misc->rt_power.readOp.dynamic    = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access;

    	rt_power = rt_power + (ID_inst->rt_power +
							ID_operand->rt_power +
							ID_misc->rt_power);
		//cout<<"ID inst: "<<ID_inst->rt_power.readOp.dynamic << " ID operand: "<<ID_operand->rt_power.readOp.dynamic<<" ID misc: "<<ID_misc->rt_power.readOp.dynamic<<endl;
    }
}

void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
	if (!exist) return;
	string indent_str(indent, ' ');
	string indent_str_next(indent+2, ' ');
	bool long_channel = XML->sys.longer_channel_device;


	if (is_tdp)
	{

		cout << indent_str<< "Instruction Cache:" << endl;
		cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl;
		cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		if (coredynp.predictionW>0)
		{
			cout << indent_str<< "Branch Target Buffer:" << endl;
			cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage)  << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
			if (BPT->exist)
			{
				cout << indent_str<< "Branch Predictor:" << endl;
				cout << indent_str_next << "Area = " << BPT->area.get_area()  *1e-6<< " mm^2" << endl;
				cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "Subthreshold Leakage = "
					<< (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage)  << " W" << endl;
				cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage  << " W" << endl;
				cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
				cout <<endl;
				if (plevel>3)
				{
					BPT->displayEnergy(indent+4, plevel, is_tdp);
				}
			}
		}
		cout << indent_str<< "Instruction Buffer:" << endl;
		cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6  << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
		<< (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage)  << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		cout << indent_str<< "Instruction Decoder:" << endl;
		cout << indent_str_next << "Area = " << (ID_inst->area.get_area() +
				ID_operand->area.get_area() +
				ID_misc->area.get_area())*coredynp.decodeW*1e-6  << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << (ID_inst->power.readOp.dynamic +
				ID_operand->power.readOp.dynamic +
				ID_misc->power.readOp.dynamic)*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
		<< (long_channel? (ID_inst->power.readOp.longer_channel_leakage +
				ID_operand->power.readOp.longer_channel_leakage +
				ID_misc->power.readOp.longer_channel_leakage):
					(ID_inst->power.readOp.leakage +
							ID_operand->power.readOp.leakage +
							ID_misc->power.readOp.leakage))  << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << (ID_inst->power.readOp.gate_leakage +
				ID_operand->power.readOp.gate_leakage +
				ID_misc->power.readOp.gate_leakage)  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << (ID_inst->rt_power.readOp.dynamic +
				ID_operand->rt_power.readOp.dynamic +
				ID_misc->rt_power.readOp.dynamic)/executionTime << " W" << endl;
		cout <<endl;
	}
	else
	{
//		cout << indent_str_next << "Instruction Cache    Peak Dynamic = " << icache.rt_power.readOp.dynamic*clockRate << " W" << endl;
//		cout << indent_str_next << "Instruction Cache    Subthreshold Leakage = " << icache.rt_power.readOp.leakage <<" W" << endl;
//		cout << indent_str_next << "Instruction Cache    Gate Leakage = " << icache.rt_power.readOp.gate_leakage << " W" << endl;
//		cout << indent_str_next << "Instruction Buffer   Peak Dynamic = " << IB->rt_power.readOp.dynamic*clockRate  << " W" << endl;
//		cout << indent_str_next << "Instruction Buffer   Subthreshold Leakage = " << IB->rt_power.readOp.leakage  << " W" << endl;
//		cout << indent_str_next << "Instruction Buffer   Gate Leakage = " << IB->rt_power.readOp.gate_leakage  << " W" << endl;
//		cout << indent_str_next << "Branch Target Buffer   Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate  << " W" << endl;
//		cout << indent_str_next << "Branch Target Buffer   Subthreshold Leakage = " << BTB->rt_power.readOp.leakage  << " W" << endl;
//		cout << indent_str_next << "Branch Target Buffer   Gate Leakage = " << BTB->rt_power.readOp.gate_leakage  << " W" << endl;
//		cout << indent_str_next << "Branch Predictor   Peak Dynamic = " << BPT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
//		cout << indent_str_next << "Branch Predictor   Subthreshold Leakage = " << BPT->rt_power.readOp.leakage  << " W" << endl;
//		cout << indent_str_next << "Branch Predictor   Gate Leakage = " << BPT->rt_power.readOp.gate_leakage  << " W" << endl;
	}

}

void RENAMINGU::computeEnergy(bool is_tdp)
{
	if (!exist) return;
	double pppm_t[4]    = {1,1,1,1};
	if (is_tdp)
	{//init stats for Peak
		if (coredynp.core_ty==OOO){
			if (coredynp.scheu_ty==PhysicalRegFile)
			{
				if (coredynp.rm_ty ==RAMbased)
				{
					iFRAT->stats_t.readAc.access   = iFRAT->l_ip.num_rd_ports;
					iFRAT->stats_t.writeAc.access  = iFRAT->l_ip.num_wr_ports;
					iFRAT->tdp_stats = iFRAT->stats_t;

					fFRAT->stats_t.readAc.access   = fFRAT->l_ip.num_rd_ports;
					fFRAT->stats_t.writeAc.access  = fFRAT->l_ip.num_wr_ports;
					fFRAT->tdp_stats = fFRAT->stats_t;

				}
				else if ((coredynp.rm_ty ==CAMbased))
				{
					iFRAT->stats_t.readAc.access   = iFRAT->l_ip.num_search_ports;
					iFRAT->stats_t.writeAc.access  = iFRAT->l_ip.num_wr_ports;
					iFRAT->tdp_stats = iFRAT->stats_t;

					fFRAT->stats_t.readAc.access   = fFRAT->l_ip.num_search_ports;
					fFRAT->stats_t.writeAc.access  = fFRAT->l_ip.num_wr_ports;
					fFRAT->tdp_stats = fFRAT->stats_t;
				}

				iRRAT->stats_t.readAc.access   = iRRAT->l_ip.num_rd_ports;
				iRRAT->stats_t.writeAc.access  = iRRAT->l_ip.num_wr_ports;
				iRRAT->tdp_stats = iRRAT->stats_t;

				fRRAT->stats_t.readAc.access   = fRRAT->l_ip.num_rd_ports;
				fRRAT->stats_t.writeAc.access  = fRRAT->l_ip.num_wr_ports;
				fRRAT->tdp_stats = fRRAT->stats_t;

				ifreeL->stats_t.readAc.access   = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;;
				ifreeL->stats_t.writeAc.access  = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
				ifreeL->tdp_stats = ifreeL->stats_t;

				ffreeL->stats_t.readAc.access   = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports;
				ffreeL->stats_t.writeAc.access  = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports;
				ffreeL->tdp_stats = ffreeL->stats_t;
			}
			else if (coredynp.scheu_ty==ReservationStation){
				if (coredynp.rm_ty ==RAMbased)
				{
					iFRAT->stats_t.readAc.access    = iFRAT->l_ip.num_rd_ports;
					iFRAT->stats_t.writeAc.access   = iFRAT->l_ip.num_wr_ports;
					iFRAT->stats_t.searchAc.access  = iFRAT->l_ip.num_search_ports;
					iFRAT->tdp_stats = iFRAT->stats_t;

					fFRAT->stats_t.readAc.access    = fFRAT->l_ip.num_rd_ports;
					fFRAT->stats_t.writeAc.access   = fFRAT->l_ip.num_wr_ports;
					fFRAT->stats_t.searchAc.access  = fFRAT->l_ip.num_search_ports;
					fFRAT->tdp_stats = fFRAT->stats_t;

				}
				else if ((coredynp.rm_ty ==CAMbased))
				{
					iFRAT->stats_t.readAc.access   = iFRAT->l_ip.num_search_ports;
					iFRAT->stats_t.writeAc.access  = iFRAT->l_ip.num_wr_ports;
					iFRAT->tdp_stats = iFRAT->stats_t;

					fFRAT->stats_t.readAc.access   = fFRAT->l_ip.num_search_ports;
					fFRAT->stats_t.writeAc.access  = fFRAT->l_ip.num_wr_ports;
					fFRAT->tdp_stats = fFRAT->stats_t;
				}
				//Unified free list for both int and fp
				ifreeL->stats_t.readAc.access   = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;
				ifreeL->stats_t.writeAc.access  = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
				ifreeL->tdp_stats = ifreeL->stats_t;
			}
			idcl->stats_t.readAc.access = coredynp.decodeW;
			fdcl->stats_t.readAc.access = coredynp.decodeW;
			idcl->tdp_stats = idcl->stats_t;
			fdcl->tdp_stats = fdcl->stats_t;
		}
		else
		{
			if (coredynp.issueW>1)
			{
				idcl->stats_t.readAc.access = coredynp.decodeW;
				fdcl->stats_t.readAc.access = coredynp.decodeW;
				idcl->tdp_stats = idcl->stats_t;
				fdcl->tdp_stats = fdcl->stats_t;
			}
		}

	}
	else
	{//init stats for Runtime Dynamic (RTP)
		if (coredynp.core_ty==OOO){
			if (coredynp.scheu_ty==PhysicalRegFile)
			{
				if (coredynp.rm_ty ==RAMbased)
				{
					iFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
					iFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
					iFRAT->rtp_stats = iFRAT->stats_t;

					fFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
					fFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
					fFRAT->rtp_stats = fFRAT->stats_t;
				}
				else if ((coredynp.rm_ty ==CAMbased))
				{
					iFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
					iFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
					iFRAT->rtp_stats = iFRAT->stats_t;

					fFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
					fFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
					fFRAT->rtp_stats = fFRAT->stats_t;
				}

				iRRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16
				iRRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
				iRRAT->rtp_stats = iRRAT->stats_t;

				fRRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16
				fRRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
				fRRAT->rtp_stats = fRRAT->stats_t;

				ifreeL->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
				ifreeL->stats_t.writeAc.access  = 2*XML->sys.core[ithCore].rename_writes;
				ifreeL->rtp_stats = ifreeL->stats_t;

				ffreeL->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
				ffreeL->stats_t.writeAc.access  = 2*XML->sys.core[ithCore].fp_rename_writes;
				ffreeL->rtp_stats = ffreeL->stats_t;
			}
			else if (coredynp.scheu_ty==ReservationStation){
				if (coredynp.rm_ty ==RAMbased)
				{
					iFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
					iFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
					iFRAT->stats_t.searchAc.access  = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs.
					iFRAT->rtp_stats = iFRAT->stats_t;

					fFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
					fFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
					fFRAT->stats_t.searchAc.access  = XML->sys.core[ithCore].committed_fp_instructions;
					fFRAT->rtp_stats = fFRAT->stats_t;
				}
				else if ((coredynp.rm_ty ==CAMbased))
				{
					iFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
					iFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
					iFRAT->rtp_stats = iFRAT->stats_t;

					fFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
					fFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
					fFRAT->rtp_stats = fFRAT->stats_t;
				}
				//Unified free list for both int and fp since the ROB act as physcial registers
				ifreeL->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads +
					XML->sys.core[ithCore].fp_rename_reads;
				ifreeL->stats_t.writeAc.access  = 2*(XML->sys.core[ithCore].rename_writes +
					XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group
															 //are terminated early
				ifreeL->rtp_stats = ifreeL->stats_t;
			}
			idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads;
			fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes;
			idcl->rtp_stats = idcl->stats_t;
			fdcl->rtp_stats = fdcl->stats_t;
		}
		else
		{
			if (coredynp.issueW>1)
			{
				idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions;
				fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions;
				idcl->rtp_stats = idcl->stats_t;
				fdcl->rtp_stats = fdcl->stats_t;
			}
		}

	}
    /* Compute engine */
	if (coredynp.core_ty==OOO)
	{
		if (coredynp.scheu_ty==PhysicalRegFile)
		{
			if (coredynp.rm_ty ==RAMbased)
			{
				iFRAT->power_t.reset();
				fFRAT->power_t.reset();

				iFRAT->power_t.readOp.dynamic  +=  (iFRAT->stats_t.readAc.access
						*(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
						+iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
				fFRAT->power_t.readOp.dynamic  +=  (fFRAT->stats_t.readAc.access
						*(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
						+fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
			}
			else if ((coredynp.rm_ty ==CAMbased))
			{
				iFRAT->power_t.reset();
				fFRAT->power_t.reset();
				iFRAT->power_t.readOp.dynamic  +=  (iFRAT->stats_t.readAc.access
						*(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
						+iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
				fFRAT->power_t.readOp.dynamic  +=  (fFRAT->stats_t.readAc.access
						*(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
						+fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
			}

			iRRAT->power_t.reset();
			fRRAT->power_t.reset();
			ifreeL->power_t.reset();
			ffreeL->power_t.reset();

			iRRAT->power_t.readOp.dynamic  +=  (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic
					+iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic);
			fRRAT->power_t.readOp.dynamic  +=  (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic
					+fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic);
			ifreeL->power_t.readOp.dynamic  +=  (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
					+ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
			ffreeL->power_t.readOp.dynamic  +=  (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic
					+ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic);

		}
		else if (coredynp.scheu_ty==ReservationStation)
		{
			if (coredynp.rm_ty ==RAMbased)
			{
				iFRAT->power_t.reset();
				fFRAT->power_t.reset();

				iFRAT->power_t.readOp.dynamic  +=  (iFRAT->stats_t.readAc.access
						*(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
						+iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic
						+iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic);
				fFRAT->power_t.readOp.dynamic  +=  (fFRAT->stats_t.readAc.access
						*(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
						+fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic
						+fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic);
			}
			else if ((coredynp.rm_ty ==CAMbased))
			{
				iFRAT->power_t.reset();
				fFRAT->power_t.reset();
				iFRAT->power_t.readOp.dynamic  +=  (iFRAT->stats_t.readAc.access
						*(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
						+iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
				fFRAT->power_t.readOp.dynamic  +=  (fFRAT->stats_t.readAc.access
						*(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
						+fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
			}
			ifreeL->power_t.reset();
			ifreeL->power_t.readOp.dynamic  +=  (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
					+ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
		}

	}
	else
	{
		if (coredynp.issueW>1)
		{
			idcl->power_t.reset();
			fdcl->power_t.reset();
			set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
			idcl->power_t = idcl->power * pppm_t;
			set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
			fdcl->power_t = fdcl->power * pppm_t;
		}

	}

	//assign value to tpd and rtp
	if (is_tdp)
	{
		if (coredynp.core_ty==OOO)
		{
			if (coredynp.scheu_ty==PhysicalRegFile)
			{
				iFRAT->power   =  iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
				fFRAT->power   =  fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
				iRRAT->power   =  iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
				fRRAT->power   =  fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
				ifreeL->power  =  ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
				ffreeL->power  =  ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
				power	       =  power + (iFRAT->power + fFRAT->power)
				                 + (iRRAT->power + fRRAT->power)
				                 + (ifreeL->power + ffreeL->power);
			}
			else if (coredynp.scheu_ty==ReservationStation)
			{
				iFRAT->power   =  iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
				fFRAT->power   =  fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
				ifreeL->power  =  ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
				power	       =  power + (iFRAT->power + fFRAT->power)
				                 + ifreeL->power;
			}
		}
		else
		{
			power   =  power + idcl->power_t + fdcl->power_t;
		}

	}
	else
	{
		if (coredynp.core_ty==OOO)
		{
			if (coredynp.scheu_ty==PhysicalRegFile)
			{
				iFRAT->rt_power   =  iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
				fFRAT->rt_power   =  fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
				iRRAT->rt_power   =  iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
				fRRAT->rt_power   =  fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
				ifreeL->rt_power  =  ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
				ffreeL->rt_power  =  ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
				rt_power	      =  rt_power + (iFRAT->rt_power + fFRAT->rt_power)
				                   + (iRRAT->rt_power + fRRAT->rt_power)
				                   + (ifreeL->rt_power + ffreeL->rt_power);
			}
			else if (coredynp.scheu_ty==ReservationStation)
			{
				iFRAT->rt_power   =  iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
				fFRAT->rt_power   =  fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
				ifreeL->rt_power  =  ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
				rt_power	      =  rt_power + (iFRAT->rt_power + fFRAT->rt_power)
				                   + ifreeL->rt_power;
			}
		}
		else
		{
			rt_power   =  rt_power + idcl->power_t + fdcl->power_t;
		}

	}
}

void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
	if (!exist) return;
	string indent_str(indent, ' ');
	string indent_str_next(indent+2, ' ');
	bool long_channel = XML->sys.longer_channel_device;


	if (is_tdp)
	{

		if (coredynp.core_ty==OOO)
		{
			cout << indent_str<< "Int Front End RAT:" << endl;
			cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl;
			cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
			cout << indent_str<< "FP Front End RAT:" << endl;
			cout << indent_str_next << "Area = " << fFRAT->area.get_area()*1e-6  << " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << fFRAT->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? fFRAT->power.readOp.longer_channel_leakage:fFRAT->power.readOp.leakage)  << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << fFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
			cout << indent_str<<"Free List:" << endl;
			cout << indent_str_next << "Area = " << ifreeL->area.get_area()*1e-6  << " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << ifreeL->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? ifreeL->power.readOp.longer_channel_leakage:ifreeL->power.readOp.leakage)  << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << ifreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;

			if (coredynp.scheu_ty==PhysicalRegFile)
			{
				cout << indent_str<< "Int Retire RAT: " << endl;
				cout << indent_str_next << "Area = " << iRRAT->area.get_area() *1e-6 << " mm^2" << endl;
				cout << indent_str_next << "Peak Dynamic = " << iRRAT->power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "Subthreshold Leakage = "
					<< (long_channel? iRRAT->power.readOp.longer_channel_leakage:iRRAT->power.readOp.leakage)  << " W" << endl;
				cout << indent_str_next << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage  << " W" << endl;
				cout << indent_str_next << "Runtime Dynamic = " << iRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
				cout <<endl;
				cout << indent_str<< "FP Retire RAT:" << endl;
				cout << indent_str_next << "Area = " << fRRAT->area.get_area()  *1e-6<< " mm^2" << endl;
				cout << indent_str_next << "Peak Dynamic = " << fRRAT->power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "Subthreshold Leakage = "
					<< (long_channel? fRRAT->power.readOp.longer_channel_leakage:fRRAT->power.readOp.leakage)  << " W" << endl;
				cout << indent_str_next << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage  << " W" << endl;
				cout << indent_str_next << "Runtime Dynamic = " << fRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
				cout <<endl;
				cout << indent_str<< "FP Free List:" << endl;
				cout << indent_str_next << "Area = " << ffreeL->area.get_area()*1e-6  << " mm^2" << endl;
				cout << indent_str_next << "Peak Dynamic = " << ffreeL->power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "Subthreshold Leakage = "
					<< (long_channel? ffreeL->power.readOp.longer_channel_leakage:ffreeL->power.readOp.leakage)  << " W" << endl;
				cout << indent_str_next << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage  << " W" << endl;
				cout << indent_str_next << "Runtime Dynamic = " << ffreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
				cout <<endl;
			}
		}
		else
		{
			cout << indent_str<< "Int DCL:" << endl;
			cout << indent_str_next << "Peak Dynamic = " << idcl->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? idcl->power.readOp.longer_channel_leakage:idcl->power.readOp.leakage)  << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << idcl->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << idcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout << indent_str<<"FP DCL:" << endl;
			cout << indent_str_next << "Peak Dynamic = " << fdcl->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? fdcl->power.readOp.longer_channel_leakage:fdcl->power.readOp.leakage)  << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << fdcl->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << fdcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
		}
	}
	else
	{
		if (coredynp.core_ty==OOO)
		{
			cout << indent_str_next << "Int Front End RAT    Peak Dynamic = " << iFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
			cout << indent_str_next << "Int Front End RAT    Subthreshold Leakage = " << iFRAT->rt_power.readOp.leakage <<" W" << endl;
			cout << indent_str_next << "Int Front End RAT    Gate Leakage = " << iFRAT->rt_power.readOp.gate_leakage << " W" << endl;
			cout << indent_str_next << "FP Front End RAT   Peak Dynamic = " << fFRAT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "FP Front End RAT   Subthreshold Leakage = " << fFRAT->rt_power.readOp.leakage  << " W" << endl;
			cout << indent_str_next << "FP Front End RAT   Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Free List   Peak Dynamic = " << ifreeL->rt_power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Free List   Subthreshold Leakage = " << ifreeL->rt_power.readOp.leakage  << " W" << endl;
			cout << indent_str_next << "Free List   Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage  << " W" << endl;
			if (coredynp.scheu_ty==PhysicalRegFile)
			{
				cout << indent_str_next << "Int Retire RAT   Peak Dynamic = " << iRRAT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "Int Retire RAT   Subthreshold Leakage = " << iRRAT->rt_power.readOp.leakage  << " W" << endl;
				cout << indent_str_next << "Int Retire RAT   Gate Leakage = " << iRRAT->rt_power.readOp.gate_leakage  << " W" << endl;
				cout << indent_str_next << "FP Retire RAT   Peak Dynamic = " << fRRAT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "FP Retire RAT   Subthreshold Leakage = " << fRRAT->rt_power.readOp.leakage  << " W" << endl;
				cout << indent_str_next << "FP Retire RAT   Gate Leakage = " << fRRAT->rt_power.readOp.gate_leakage  << " W" << endl;
				cout << indent_str_next << "FP Free List   Peak Dynamic = " << ffreeL->rt_power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "FP Free List   Subthreshold Leakage = " << ffreeL->rt_power.readOp.leakage  << " W" << endl;
				cout << indent_str_next << "FP Free List   Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage  << " W" << endl;
			}
		}
		else
		{
			cout << indent_str_next << "Int DCL   Peak Dynamic = " << idcl->rt_power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Int DCL   Subthreshold Leakage = " << idcl->rt_power.readOp.leakage  << " W" << endl;
			cout << indent_str_next << "Int DCL   Gate Leakage = " << idcl->rt_power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "FP DCL   Peak Dynamic = " << fdcl->rt_power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "FP DCL   Subthreshold Leakage = " << fdcl->rt_power.readOp.leakage  << " W" << endl;
			cout << indent_str_next << "FP DCL   Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage  << " W" << endl;
		}
	}

}


void SchedulerU::computeEnergy(bool is_tdp)
{
	if (!exist) return;
	double ROB_duty_cycle;
//	ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
//			+ coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
//					+ coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1;
	ROB_duty_cycle = 1;
	//init stats
	if (is_tdp)
	{
		if (coredynp.core_ty==OOO)
		{
			int_inst_window->stats_t.readAc.access    = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
			int_inst_window->stats_t.writeAc.access   = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
			int_inst_window->stats_t.searchAc.access  = coredynp.issueW*coredynp.num_pipelines;
			int_inst_window->tdp_stats                = int_inst_window->stats_t;
			fp_inst_window->stats_t.readAc.access     = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines;
			fp_inst_window->stats_t.writeAc.access    = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines;
			fp_inst_window->stats_t.searchAc.access   = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines;
			fp_inst_window->tdp_stats                 = fp_inst_window->stats_t;

			if (XML->sys.core[ithCore].ROB_size >0)
			{
				ROB->stats_t.readAc.access   = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle;
				ROB->stats_t.writeAc.access  = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle;
				ROB->tdp_stats        = ROB->stats_t;

				/*
				 * When inst commits, ROB must be read.
				 * Because for Physcial register based cores, physical register tag in ROB
				 * need to be read out and write into RRAT/CAM based RAT.
				 * For RS based cores, register content that stored in ROB must be
				 * read out and stored in architectural registers.
				 *
				 * if no-register is involved, the ROB read out operation when instruction commits can be ignored.
				 * assuming 20% insts. belong this type.
				 * TODO: ROB duty_cycle need to be revisited
				 */
			}

		}
		else if (coredynp.multithreaded)
		{
			int_inst_window->stats_t.readAc.access   = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
			int_inst_window->stats_t.writeAc.access  = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
			int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
			int_inst_window->tdp_stats       = int_inst_window->stats_t;
		}

     }
    else
    {//rtp
		if (coredynp.core_ty==OOO)
		{
			int_inst_window->stats_t.readAc.access   = XML->sys.core[ithCore].inst_window_reads;
			int_inst_window->stats_t.writeAc.access  = XML->sys.core[ithCore].inst_window_writes;
			int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses;
			int_inst_window->rtp_stats               = int_inst_window->stats_t;
			fp_inst_window->stats_t.readAc.access    = XML->sys.core[ithCore].fp_inst_window_reads;
			fp_inst_window->stats_t.writeAc.access   = XML->sys.core[ithCore].fp_inst_window_writes;
			fp_inst_window->stats_t.searchAc.access  = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses;
			fp_inst_window->rtp_stats                = fp_inst_window->stats_t;

			if (XML->sys.core[ithCore].ROB_size >0)
			{

				ROB->stats_t.readAc.access   = XML->sys.core[ithCore].ROB_reads;
				ROB->stats_t.writeAc.access  = XML->sys.core[ithCore].ROB_writes;
				/* ROB need to be updated in RS based OOO when new values are produced,
				 * this update may happen before the commit stage when ROB entry is released
				 * 1. ROB write at instruction inserted in
				 * 2. ROB write as results produced (for RS based OOO only)
				 * 3. ROB read  as instruction committed. For RS based OOO, data values are read out and sent to ARF
				 * For Physical reg based OOO, no data stored in ROB, but register tags need to be
				 * read out and used to set the RRAT and to recycle the register tag to free list buffer
				 */
				ROB->rtp_stats        = ROB->stats_t;
			}

		}
		else if (coredynp.multithreaded)
		{
			int_inst_window->stats_t.readAc.access    = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
			int_inst_window->stats_t.writeAc.access   = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
			int_inst_window->stats_t.searchAc.access  = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions);
			int_inst_window->rtp_stats                = int_inst_window->stats_t;
		}
    }

	//computation engine
	if (coredynp.core_ty==OOO)
	{
		int_inst_window->power_t.reset();
		fp_inst_window->power_t.reset();

		/* each instruction needs to write to scheduler, read out when all resources and source operands are ready
		 * two search ops with one for each source operand
		 *
		 */
		int_inst_window->power_t.readOp.dynamic  +=  int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
					+ int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
					+ int_inst_window->local_result.power.writeOp.dynamic  * int_inst_window->stats_t.writeAc.access
					+ int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic;

		fp_inst_window->power_t.readOp.dynamic   +=  fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access
					+ fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access
					+ fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access
					+ fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;

		if (XML->sys.core[ithCore].ROB_size >0)
		{
			ROB->power_t.reset();
			ROB->power_t.readOp.dynamic   +=  ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access +
						ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic;
		}




	}
	else if (coredynp.multithreaded)
	{
		int_inst_window->power_t.reset();
		int_inst_window->power_t.readOp.dynamic  +=  int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
						  + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
				          + int_inst_window->local_result.power.writeOp.dynamic  * int_inst_window->stats_t.writeAc.access
				          + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
	}

	//assign values
	if (is_tdp)
	{
		if (coredynp.core_ty==OOO)
		{
			int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
			fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
			power	   = power + int_inst_window->power + fp_inst_window->power;
			if (XML->sys.core[ithCore].ROB_size >0)
			{
				ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg;
				power	   = power + ROB->power;
			}

		}
		else if (coredynp.multithreaded)
		{
			//			set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
			int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
			power	   = power + int_inst_window->power;
      	}

     }
    else
    {//rtp
		if (coredynp.core_ty==OOO)
		{
			int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
			fp_inst_window->rt_power  = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
			rt_power	              = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power;
			if (XML->sys.core[ithCore].ROB_size >0)
			{
				ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg;
				rt_power	              = rt_power + ROB->rt_power;
			}

		}
		else if (coredynp.multithreaded)
		{
			//			set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
			int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
			rt_power	              = rt_power + int_inst_window->rt_power;
      	}
    }
//	set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
//	cout<<"Scheduler power="<<power.readOp.dynamic<<"leakage="<<power.readOp.leakage<<endl;
//	cout<<"IW="<<int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.readAc.access +
//    + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access<<"leakage="<<int_inst_window->local_result.power.readOp.leakage<<endl;
//	cout<<"selection"<<instruction_selection->power.readOp.dynamic<<"leakage"<<instruction_selection->power.readOp.leakage<<endl;
}

void SchedulerU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
	if (!exist) return;
	string indent_str(indent, ' ');
	string indent_str_next(indent+2, ' ');
	bool long_channel = XML->sys.longer_channel_device;


	if (is_tdp)
	{
		if (coredynp.core_ty==OOO)
		{
			cout << indent_str << "Instruction Window:" << endl;
			cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
			cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
			cout << indent_str << "FP Instruction Window:" << endl;
			cout << indent_str_next << "Area = " << fp_inst_window->area.get_area()*1e-6  << " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << fp_inst_window->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? fp_inst_window->power.readOp.longer_channel_leakage:fp_inst_window->power.readOp.leakage ) << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << fp_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
			if (XML->sys.core[ithCore].ROB_size >0)
			{
				cout << indent_str<<"ROB:" << endl;
				cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl;
				cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage)  << " W" << endl;
				cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage  << " W" << endl;
				cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl;
				cout <<endl;
			}
		}
		else if (coredynp.multithreaded)
		{
			cout << indent_str << "Instruction Window:" << endl;
			cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
			cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
		}
	}
	else
	{
		if (coredynp.core_ty==OOO)
		{
			cout << indent_str_next << "Instruction Window    Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
			cout << indent_str_next << "Instruction Window    Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
			cout << indent_str_next << "Instruction Window    Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
			cout << indent_str_next << "FP Instruction Window   Peak Dynamic = " << fp_inst_window->rt_power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "FP Instruction Window   Subthreshold Leakage = " << fp_inst_window->rt_power.readOp.leakage  << " W" << endl;
			cout << indent_str_next << "FP Instruction Window   Gate Leakage = " << fp_inst_window->rt_power.readOp.gate_leakage  << " W" << endl;
			if (XML->sys.core[ithCore].ROB_size >0)
			{
				cout << indent_str_next << "ROB   Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "ROB   Subthreshold Leakage = " << ROB->rt_power.readOp.leakage  << " W" << endl;
				cout << indent_str_next << "ROB   Gate Leakage = " << ROB->rt_power.readOp.gate_leakage  << " W" << endl;
			}
		}
		else if (coredynp.multithreaded)
		{
			cout << indent_str_next << "Instruction Window    Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
			cout << indent_str_next << "Instruction Window    Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
			cout << indent_str_next << "Instruction Window    Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
		}
	}

}

void LoadStoreU::computeEnergy(bool is_tdp)
{
	if (!exist) return;

	executionTime=XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);//Syed

	//RF crossbar power (Syed)
	xbar_shared->compute_power();
   
	if (is_tdp)
	    {

	    	//init stats for Peak
        // added by Jingwen
	    	sharedmemory.caches->stats_t.readAc.access  = 0.67*sharedmemory.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
	    	sharedmemory.caches->stats_t.readAc.miss    = 0;
	    	sharedmemory.caches->stats_t.readAc.hit     = sharedmemory.caches->stats_t.readAc.access - sharedmemory.caches->stats_t.readAc.miss;
	    	sharedmemory.caches->stats_t.writeAc.access = 0.33*sharedmemory.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
	    	sharedmemory.caches->stats_t.writeAc.miss   = 0;
    		sharedmemory.caches->stats_t.writeAc.hit    = sharedmemory.caches->stats_t.writeAc.access -	sharedmemory.caches->stats_t.writeAc.miss;
	    	sharedmemory.caches->tdp_stats = sharedmemory.caches->stats_t;

	    	sharedmemory.missb->stats_t.readAc.access  = sharedmemory.missb->l_ip.num_search_ports;
	    	sharedmemory.missb->stats_t.writeAc.access = sharedmemory.missb->l_ip.num_search_ports;
	    	sharedmemory.missb->tdp_stats = sharedmemory.missb->stats_t;

	    	sharedmemory.ifb->stats_t.readAc.access  = sharedmemory.ifb->l_ip.num_search_ports;
	    	sharedmemory.ifb->stats_t.writeAc.access = sharedmemory.ifb->l_ip.num_search_ports;
	    	sharedmemory.ifb->tdp_stats = sharedmemory.ifb->stats_t;

	    	sharedmemory.prefetchb->stats_t.readAc.access  = sharedmemory.prefetchb->l_ip.num_search_ports;
	    	sharedmemory.prefetchb->stats_t.writeAc.access = sharedmemory.ifb->l_ip.num_search_ports;
	    	sharedmemory.prefetchb->tdp_stats = sharedmemory.prefetchb->stats_t;
	    	if (cache_p==Write_back)
	    	{
	    		sharedmemory.wbb->stats_t.readAc.access  = sharedmemory.wbb->l_ip.num_search_ports;
	    		sharedmemory.wbb->stats_t.writeAc.access = sharedmemory.wbb->l_ip.num_search_ports;
	    		sharedmemory.wbb->tdp_stats = sharedmemory.wbb->stats_t;
	    	}



	    	//init stats for Peak
	    	dcache.caches->stats_t.readAc.access  = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
	    	dcache.caches->stats_t.readAc.miss    = 0;
	    	dcache.caches->stats_t.readAc.hit     = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
	    	dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
	    	dcache.caches->stats_t.writeAc.miss   = 0;
    		dcache.caches->stats_t.writeAc.hit    = dcache.caches->stats_t.writeAc.access -	dcache.caches->stats_t.writeAc.miss;
	    	dcache.caches->tdp_stats = dcache.caches->stats_t;

	    	dcache.missb->stats_t.readAc.access  = dcache.missb->l_ip.num_search_ports;
	    	dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports;
	    	dcache.missb->tdp_stats = dcache.missb->stats_t;

	    	dcache.ifb->stats_t.readAc.access  = dcache.ifb->l_ip.num_search_ports;
	    	dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
	    	dcache.ifb->tdp_stats = dcache.ifb->stats_t;

	    	dcache.prefetchb->stats_t.readAc.access  = dcache.prefetchb->l_ip.num_search_ports;
	    	dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
	    	dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t;
	    	if (cache_p==Write_back)
	    	{
	    		dcache.wbb->stats_t.readAc.access  = dcache.wbb->l_ip.num_search_ports;
	    		dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports;
	    		dcache.wbb->tdp_stats = dcache.wbb->stats_t;
	    	}


	    	//init stats for Peak - ccache
	    	ccache.caches->stats_t.readAc.access  = 0.67*ccache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
	    	ccache.caches->stats_t.readAc.miss    = 0;
	    	ccache.caches->stats_t.readAc.hit     = ccache.caches->stats_t.readAc.access - ccache.caches->stats_t.readAc.miss;
	    	ccache.caches->stats_t.writeAc.access = 0.33*ccache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
	    	ccache.caches->stats_t.writeAc.miss   = 0;
    		ccache.caches->stats_t.writeAc.hit    = ccache.caches->stats_t.writeAc.access -	ccache.caches->stats_t.writeAc.miss;
	    	ccache.caches->tdp_stats = ccache.caches->stats_t;

	    	ccache.missb->stats_t.readAc.access  = ccache.missb->l_ip.num_search_ports;
	    	ccache.missb->stats_t.writeAc.access = ccache.missb->l_ip.num_search_ports;
	    	ccache.missb->tdp_stats = ccache.missb->stats_t;

	    	ccache.ifb->stats_t.readAc.access  = ccache.ifb->l_ip.num_search_ports;
	    	ccache.ifb->stats_t.writeAc.access = ccache.ifb->l_ip.num_search_ports;
	    	ccache.ifb->tdp_stats = ccache.ifb->stats_t;

	    	ccache.prefetchb->stats_t.readAc.access  = ccache.prefetchb->l_ip.num_search_ports;
	    	ccache.prefetchb->stats_t.writeAc.access = ccache.ifb->l_ip.num_search_ports;
	    	ccache.prefetchb->tdp_stats = ccache.prefetchb->stats_t;
	    	if (cache_p==Write_back)
	    	{
	    		ccache.wbb->stats_t.readAc.access  = ccache.wbb->l_ip.num_search_ports;
	    		ccache.wbb->stats_t.writeAc.access = ccache.wbb->l_ip.num_search_ports;
	    		ccache.wbb->tdp_stats = ccache.wbb->stats_t;
	    	}


	    	//init stats for Peak - tcache
	    	tcache.caches->stats_t.readAc.access  = 0.67*tcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
	    	tcache.caches->stats_t.readAc.miss    = 0;
	    	tcache.caches->stats_t.readAc.hit     = tcache.caches->stats_t.readAc.access - tcache.caches->stats_t.readAc.miss;
	    	tcache.caches->stats_t.writeAc.access = 0.33*tcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
	    	tcache.caches->stats_t.writeAc.miss   = 0;
    		tcache.caches->stats_t.writeAc.hit    = tcache.caches->stats_t.writeAc.access -	tcache.caches->stats_t.writeAc.miss;
	    	tcache.caches->tdp_stats = tcache.caches->stats_t;

	    	tcache.missb->stats_t.readAc.access  = tcache.missb->l_ip.num_search_ports;
	    	tcache.missb->stats_t.writeAc.access = tcache.missb->l_ip.num_search_ports;
	    	tcache.missb->tdp_stats = tcache.missb->stats_t;

	    	tcache.ifb->stats_t.readAc.access  = tcache.ifb->l_ip.num_search_ports;
	    	tcache.ifb->stats_t.writeAc.access = tcache.ifb->l_ip.num_search_ports;
	    	tcache.ifb->tdp_stats = tcache.ifb->stats_t;

	    	tcache.prefetchb->stats_t.readAc.access  = tcache.prefetchb->l_ip.num_search_ports;
	    	tcache.prefetchb->stats_t.writeAc.access = tcache.ifb->l_ip.num_search_ports;
	    	tcache.prefetchb->tdp_stats = tcache.prefetchb->stats_t;
	    	if (cache_p==Write_back)
	    	{
	    		tcache.wbb->stats_t.readAc.access  = tcache.wbb->l_ip.num_search_ports;
	    		tcache.wbb->stats_t.writeAc.access = tcache.wbb->l_ip.num_search_ports;
	    		tcache.wbb->tdp_stats = tcache.wbb->stats_t;
	    	}



	    	LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
	    	LSQ->tdp_stats = LSQ->stats_t;
	    	if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
	    	{
	    		LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
	    		LoadQ->tdp_stats = LoadQ->stats_t;
	    	}
	    }
	    else
	    {
	    	//init stats for Runtime Dynamic (RTP)

	    	sharedmemory.caches->stats_t.readAc.access  = XML->sys.core[ithCore].sharedmemory.read_accesses;
	    	sharedmemory.caches->stats_t.readAc.miss    = XML->sys.core[ithCore].sharedmemory.read_misses;
	    	sharedmemory.caches->stats_t.readAc.hit     = sharedmemory.caches->stats_t.readAc.access - sharedmemory.caches->stats_t.readAc.miss;
	    	sharedmemory.caches->stats_t.writeAc.access = XML->sys.core[ithCore].sharedmemory.write_accesses;
	    	sharedmemory.caches->stats_t.writeAc.miss   = XML->sys.core[ithCore].sharedmemory.write_misses;
    		sharedmemory.caches->stats_t.writeAc.hit    = sharedmemory.caches->stats_t.writeAc.access -	sharedmemory.caches->stats_t.writeAc.miss;
	    	sharedmemory.caches->rtp_stats = sharedmemory.caches->stats_t;



	    	dcache.caches->stats_t.readAc.access  = XML->sys.core[ithCore].dcache.read_accesses;
	    	dcache.caches->stats_t.readAc.miss    = XML->sys.core[ithCore].dcache.read_misses;
	    	dcache.caches->stats_t.readAc.hit     = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
	    	dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses;
	    	dcache.caches->stats_t.writeAc.miss   = XML->sys.core[ithCore].dcache.write_misses;
    		dcache.caches->stats_t.writeAc.hit    = dcache.caches->stats_t.writeAc.access -	dcache.caches->stats_t.writeAc.miss;
	    	dcache.caches->rtp_stats = dcache.caches->stats_t;

	    	ccache.caches->stats_t.readAc.access  = XML->sys.core[ithCore].ccache.read_accesses;
	    	ccache.caches->stats_t.readAc.miss    = XML->sys.core[ithCore].ccache.read_misses;
	    	ccache.caches->stats_t.readAc.hit     = ccache.caches->stats_t.readAc.access - ccache.caches->stats_t.readAc.miss;
	    	ccache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].ccache.write_accesses;
	    	ccache.caches->stats_t.writeAc.miss   = XML->sys.core[ithCore].ccache.write_misses;
    		ccache.caches->stats_t.writeAc.hit    = ccache.caches->stats_t.writeAc.access -	ccache.caches->stats_t.writeAc.miss;
	    	ccache.caches->rtp_stats = ccache.caches->stats_t;

	    	tcache.caches->stats_t.readAc.access  = XML->sys.core[ithCore].tcache.read_accesses;
	    	tcache.caches->stats_t.readAc.miss    = XML->sys.core[ithCore].tcache.read_misses;
	    	tcache.caches->stats_t.readAc.hit     = tcache.caches->stats_t.readAc.access - tcache.caches->stats_t.readAc.miss;
	    	tcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].tcache.write_accesses;
	    	tcache.caches->stats_t.writeAc.miss   = XML->sys.core[ithCore].tcache.write_misses;
    		tcache.caches->stats_t.writeAc.hit    = tcache.caches->stats_t.writeAc.access -	tcache.caches->stats_t.writeAc.miss;
	    	tcache.caches->rtp_stats = tcache.caches->stats_t;

	    	if (cache_p==Write_back)
	    	{

	    		sharedmemory.missb->stats_t.readAc.access  = sharedmemory.caches->stats_t.writeAc.miss;
	    		sharedmemory.missb->stats_t.writeAc.access = sharedmemory.caches->stats_t.writeAc.miss;
	    		sharedmemory.missb->rtp_stats = sharedmemory.missb->stats_t;
	    		sharedmemory.ifb->stats_t.readAc.access  = sharedmemory.caches->stats_t.writeAc.miss;
	    		sharedmemory.ifb->stats_t.writeAc.access = sharedmemory.caches->stats_t.writeAc.miss;
	    		sharedmemory.ifb->rtp_stats = sharedmemory.ifb->stats_t;
	    		sharedmemory.prefetchb->stats_t.readAc.access  = sharedmemory.caches->stats_t.writeAc.miss;
	    		sharedmemory.prefetchb->stats_t.writeAc.access = sharedmemory.caches->stats_t.writeAc.miss;
	    		sharedmemory.prefetchb->rtp_stats = sharedmemory.prefetchb->stats_t;
	    		sharedmemory.wbb->stats_t.readAc.access  = sharedmemory.caches->stats_t.writeAc.miss;
	    		sharedmemory.wbb->stats_t.writeAc.access = sharedmemory.caches->stats_t.writeAc.miss;
	    		sharedmemory.wbb->rtp_stats = sharedmemory.wbb->stats_t;


	    		dcache.missb->stats_t.readAc.access  = dcache.caches->stats_t.writeAc.miss;
	    		dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
	    		dcache.missb->rtp_stats = dcache.missb->stats_t;
	    		dcache.ifb->stats_t.readAc.access  = dcache.caches->stats_t.writeAc.miss;
	    		dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
	    		dcache.ifb->rtp_stats = dcache.ifb->stats_t;
	    		dcache.prefetchb->stats_t.readAc.access  = dcache.caches->stats_t.writeAc.miss;
	    		dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
	    		dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
	    		dcache.wbb->stats_t.readAc.access  = dcache.caches->stats_t.writeAc.miss;
	    		dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
	    		dcache.wbb->rtp_stats = dcache.wbb->stats_t;

	    		ccache.missb->stats_t.readAc.access  = ccache.caches->stats_t.writeAc.miss;
	    		ccache.missb->stats_t.writeAc.access = ccache.caches->stats_t.writeAc.miss;
	    		ccache.missb->rtp_stats = ccache.missb->stats_t;
	    		ccache.ifb->stats_t.readAc.access  = ccache.caches->stats_t.writeAc.miss;
	    		ccache.ifb->stats_t.writeAc.access = ccache.caches->stats_t.writeAc.miss;
	    		ccache.ifb->rtp_stats = ccache.ifb->stats_t;
	    		ccache.prefetchb->stats_t.readAc.access  = ccache.caches->stats_t.writeAc.miss;
	    		ccache.prefetchb->stats_t.writeAc.access = ccache.caches->stats_t.writeAc.miss;
	    		ccache.prefetchb->rtp_stats = ccache.prefetchb->stats_t;
	    		ccache.wbb->stats_t.readAc.access  = ccache.caches->stats_t.writeAc.miss;
	    		ccache.wbb->stats_t.writeAc.access = ccache.caches->stats_t.writeAc.miss;
	    		ccache.wbb->rtp_stats = ccache.wbb->stats_t;

	    		tcache.missb->stats_t.readAc.access  = tcache.caches->stats_t.writeAc.miss;
	    		tcache.missb->stats_t.writeAc.access = tcache.caches->stats_t.writeAc.miss;
	    		tcache.missb->rtp_stats = tcache.missb->stats_t;
	    		tcache.ifb->stats_t.readAc.access  = tcache.caches->stats_t.writeAc.miss;
	    		tcache.ifb->stats_t.writeAc.access = tcache.caches->stats_t.writeAc.miss;
	    		tcache.ifb->rtp_stats = tcache.ifb->stats_t;
	    		tcache.prefetchb->stats_t.readAc.access  = tcache.caches->stats_t.writeAc.miss;
	    		tcache.prefetchb->stats_t.writeAc.access = tcache.caches->stats_t.writeAc.miss;
	    		tcache.prefetchb->rtp_stats = tcache.prefetchb->stats_t;
	    		tcache.wbb->stats_t.readAc.access  = tcache.caches->stats_t.writeAc.miss;
	    		tcache.wbb->stats_t.writeAc.access = tcache.caches->stats_t.writeAc.miss;
	    		tcache.wbb->rtp_stats = tcache.wbb->stats_t;
	    	}
	    	else
	    	{
	    		sharedmemory.missb->stats_t.readAc.access  = sharedmemory.caches->stats_t.readAc.miss;
	    		sharedmemory.missb->stats_t.writeAc.access = sharedmemory.caches->stats_t.readAc.miss;
	    		sharedmemory.missb->rtp_stats = sharedmemory.missb->stats_t;
	    		sharedmemory.ifb->stats_t.readAc.access  = sharedmemory.caches->stats_t.readAc.miss;
	    		sharedmemory.ifb->stats_t.writeAc.access = sharedmemory.caches->stats_t.readAc.miss;
	    		sharedmemory.ifb->rtp_stats = sharedmemory.ifb->stats_t;
	    		sharedmemory.prefetchb->stats_t.readAc.access  = sharedmemory.caches->stats_t.readAc.miss;
	    		sharedmemory.prefetchb->stats_t.writeAc.access = sharedmemory.caches->stats_t.readAc.miss;
	    		sharedmemory.prefetchb->rtp_stats = sharedmemory.prefetchb->stats_t;


	    		dcache.missb->stats_t.readAc.access  = dcache.caches->stats_t.readAc.miss;
	    		dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
	    		dcache.missb->rtp_stats = dcache.missb->stats_t;
	    		dcache.ifb->stats_t.readAc.access  = dcache.caches->stats_t.readAc.miss;
	    		dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
	    		dcache.ifb->rtp_stats = dcache.ifb->stats_t;
	    		dcache.prefetchb->stats_t.readAc.access  = dcache.caches->stats_t.readAc.miss;
	    		dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
	    		dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;


	    		ccache.missb->stats_t.readAc.access  = ccache.caches->stats_t.readAc.miss;
	    		ccache.missb->stats_t.writeAc.access = ccache.caches->stats_t.readAc.miss;
	    		ccache.missb->rtp_stats = ccache.missb->stats_t;
	    		ccache.ifb->stats_t.readAc.access  = ccache.caches->stats_t.readAc.miss;
	    		ccache.ifb->stats_t.writeAc.access = ccache.caches->stats_t.readAc.miss;
	    		ccache.ifb->rtp_stats = ccache.ifb->stats_t;
	    		ccache.prefetchb->stats_t.readAc.access  = ccache.caches->stats_t.readAc.miss;
	    		ccache.prefetchb->stats_t.writeAc.access = ccache.caches->stats_t.readAc.miss;
	    		ccache.prefetchb->rtp_stats = ccache.prefetchb->stats_t;

	    		tcache.missb->stats_t.readAc.access  = tcache.caches->stats_t.readAc.miss;
	    		tcache.missb->stats_t.writeAc.access = tcache.caches->stats_t.readAc.miss;
	    		tcache.missb->rtp_stats = tcache.missb->stats_t;
	    		tcache.ifb->stats_t.readAc.access  = tcache.caches->stats_t.readAc.miss;
	    		tcache.ifb->stats_t.writeAc.access = tcache.caches->stats_t.readAc.miss;
	    		tcache.ifb->rtp_stats = tcache.ifb->stats_t;
	    		tcache.prefetchb->stats_t.readAc.access  = tcache.caches->stats_t.readAc.miss;
	    		tcache.prefetchb->stats_t.writeAc.access = tcache.caches->stats_t.readAc.miss;
	    		tcache.prefetchb->rtp_stats = tcache.prefetchb->stats_t;

	    	}

	    	LSQ->stats_t.readAc.access  = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered
	    	LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;
	    	LSQ->rtp_stats = LSQ->stats_t;

	    	if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
	    	{
		    	LoadQ->stats_t.readAc.access  = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
		    	LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
		    	LoadQ->rtp_stats = LoadQ->stats_t;
	    	}

	    }

	sharedmemory.power_t.reset();
	dcache.power_t.reset();
	ccache.power_t.reset();
	tcache.power_t.reset();
	LSQ->power_t.reset();

    sharedmemory.power_t.readOp.dynamic	+= (sharedmemory.caches->stats_t.readAc.hit*sharedmemory.caches->local_result.power.readOp.dynamic+
    		sharedmemory.caches->stats_t.readAc.miss*sharedmemory.caches->local_result.power.readOp.dynamic+
    		sharedmemory.caches->stats_t.writeAc.miss*sharedmemory.caches->local_result.tag_array2->power.readOp.dynamic+
    		sharedmemory.caches->stats_t.writeAc.access*sharedmemory.caches->local_result.power.writeOp.dynamic +
			xbar_shared->power.readOp.dynamic*(sharedmemory.caches->stats_t.readAc.hit+ sharedmemory.caches->stats_t.writeAc.hit));


    dcache.power_t.readOp.dynamic	+= (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+
    		dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+
    		dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+
    		dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic +
			xbar_shared->power.readOp.dynamic*(dcache.caches->stats_t.readAc.hit+ dcache.caches->stats_t.writeAc.hit));
    ccache.power_t.readOp.dynamic	+= (ccache.caches->stats_t.readAc.hit*ccache.caches->local_result.power.readOp.dynamic+
    		ccache.caches->stats_t.readAc.miss*ccache.caches->local_result.power.readOp.dynamic+
    		ccache.caches->stats_t.writeAc.miss*ccache.caches->local_result.tag_array2->power.readOp.dynamic+
    		ccache.caches->stats_t.writeAc.access*ccache.caches->local_result.power.writeOp.dynamic + 
			xbar_shared->power.readOp.dynamic*(ccache.caches->stats_t.readAc.hit));

    tcache.power_t.readOp.dynamic	+= (tcache.caches->stats_t.readAc.hit*tcache.caches->local_result.power.readOp.dynamic+
    		tcache.caches->stats_t.readAc.miss*tcache.caches->local_result.power.readOp.dynamic+
    		tcache.caches->stats_t.writeAc.miss*tcache.caches->local_result.tag_array2->power.readOp.dynamic+
    		tcache.caches->stats_t.writeAc.access*tcache.caches->local_result.power.writeOp.dynamic+
			xbar_shared->power.readOp.dynamic*(tcache.caches->stats_t.readAc.hit+ tcache.caches->stats_t.writeAc.hit));

    if (cache_p==Write_back)
    {//write miss will generate a write later
    	dcache.power_t.readOp.dynamic	+= dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic;
    	ccache.power_t.readOp.dynamic	+= ccache.caches->stats_t.writeAc.miss*ccache.caches->local_result.power.writeOp.dynamic;
    	tcache.power_t.readOp.dynamic	+= tcache.caches->stats_t.writeAc.miss*tcache.caches->local_result.power.writeOp.dynamic;
    	sharedmemory.power_t.readOp.dynamic	+= sharedmemory.caches->stats_t.writeAc.miss*sharedmemory.caches->local_result.power.writeOp.dynamic;
    }


    sharedmemory.power_t.readOp.dynamic	+=  sharedmemory.missb->stats_t.readAc.access*sharedmemory.missb->local_result.power.searchOp.dynamic +
            sharedmemory.missb->stats_t.writeAc.access*sharedmemory.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
    sharedmemory.power_t.readOp.dynamic	+=  sharedmemory.ifb->stats_t.readAc.access*sharedmemory.ifb->local_result.power.searchOp.dynamic +
            sharedmemory.ifb->stats_t.writeAc.access*sharedmemory.ifb->local_result.power.writeOp.dynamic;
    sharedmemory.power_t.readOp.dynamic	+=  sharedmemory.prefetchb->stats_t.readAc.access*sharedmemory.prefetchb->local_result.power.searchOp.dynamic +
            sharedmemory.prefetchb->stats_t.writeAc.access*sharedmemory.prefetchb->local_result.power.writeOp.dynamic;
    if (cache_p==Write_back)
    {
    	sharedmemory.power_t.readOp.dynamic	+=  sharedmemory.wbb->stats_t.readAc.access*sharedmemory.wbb->local_result.power.searchOp.dynamic
			+ sharedmemory.wbb->stats_t.writeAc.access*sharedmemory.wbb->local_result.power.writeOp.dynamic;
    }


    dcache.power_t.readOp.dynamic	+=  dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic +
            dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
    dcache.power_t.readOp.dynamic	+=  dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic +
            dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic;
    dcache.power_t.readOp.dynamic	+=  dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic +
            dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic;
    if (cache_p==Write_back)
    {
    	dcache.power_t.readOp.dynamic	+=  dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic
			+ dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic;
    }

    ccache.power_t.readOp.dynamic	+=  ccache.missb->stats_t.readAc.access*ccache.missb->local_result.power.searchOp.dynamic +
            ccache.missb->stats_t.writeAc.access*ccache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
    ccache.power_t.readOp.dynamic	+=  ccache.ifb->stats_t.readAc.access*ccache.ifb->local_result.power.searchOp.dynamic +
            ccache.ifb->stats_t.writeAc.access*ccache.ifb->local_result.power.writeOp.dynamic;
    ccache.power_t.readOp.dynamic	+=  ccache.prefetchb->stats_t.readAc.access*ccache.prefetchb->local_result.power.searchOp.dynamic +
            ccache.prefetchb->stats_t.writeAc.access*ccache.prefetchb->local_result.power.writeOp.dynamic;
    if (cache_p==Write_back)
    {
    	ccache.power_t.readOp.dynamic	+=  ccache.wbb->stats_t.readAc.access*ccache.wbb->local_result.power.searchOp.dynamic
			+ ccache.wbb->stats_t.writeAc.access*ccache.wbb->local_result.power.writeOp.dynamic;
    }

    tcache.power_t.readOp.dynamic	+=  tcache.missb->stats_t.readAc.access*tcache.missb->local_result.power.searchOp.dynamic +
            tcache.missb->stats_t.writeAc.access*tcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
    tcache.power_t.readOp.dynamic	+=  tcache.ifb->stats_t.readAc.access*tcache.ifb->local_result.power.searchOp.dynamic +
            tcache.ifb->stats_t.writeAc.access*tcache.ifb->local_result.power.writeOp.dynamic;
    tcache.power_t.readOp.dynamic	+=  tcache.prefetchb->stats_t.readAc.access*tcache.prefetchb->local_result.power.searchOp.dynamic +
            tcache.prefetchb->stats_t.writeAc.access*tcache.prefetchb->local_result.power.writeOp.dynamic;
    if (cache_p==Write_back)
    {
    	tcache.power_t.readOp.dynamic	+=  tcache.wbb->stats_t.readAc.access*tcache.wbb->local_result.power.searchOp.dynamic
			+ tcache.wbb->stats_t.writeAc.access*tcache.wbb->local_result.power.writeOp.dynamic;
    }


    if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
    {
    	LoadQ->power_t.reset();
    	LoadQ->power_t.readOp.dynamic  +=  LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+
    	        LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ

    	LSQ->power_t.readOp.dynamic  +=  LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
    		        + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ

    }
    else
    {
    //	LSQ->power_t.readOp.dynamic  +=  LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
    	//	        + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
		//	No LSQ in GPUs (Syed)

    }

    if (is_tdp)
    {
//    	dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg +
//    			(dcache.missb->local_result.power +
//    			dcache.ifb->local_result.power +
//    			dcache.prefetchb->local_result.power +
//    			dcache.wbb->local_result.power)*pppm_Isub;


    	sharedmemory.power = sharedmemory.power_t + (sharedmemory.caches->local_result.power +
    			sharedmemory.missb->local_result.power +
    			sharedmemory.ifb->local_result.power +
    			sharedmemory.prefetchb->local_result.power + xbar_shared->power) *pppm_lkg;
    	if (cache_p==Write_back)
    	{
    		sharedmemory.power = sharedmemory.power + sharedmemory.wbb->local_result.power*pppm_lkg;
    	}


    	dcache.power = dcache.power_t + (dcache.caches->local_result.power +
    			dcache.missb->local_result.power +
    			dcache.ifb->local_result.power +
    			dcache.prefetchb->local_result.power) *pppm_lkg;
    	if (cache_p==Write_back)
    	{
    		dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg;
    	}

    	ccache.power = ccache.power_t + (ccache.caches->local_result.power +
    			ccache.missb->local_result.power +
    			ccache.ifb->local_result.power +
    			ccache.prefetchb->local_result.power) *pppm_lkg;
    	if (cache_p==Write_back)
    	{
    		ccache.power = ccache.power + ccache.wbb->local_result.power*pppm_lkg;
    	}

    	tcache.power = tcache.power_t + (tcache.caches->local_result.power +
    			tcache.missb->local_result.power +
    			tcache.ifb->local_result.power +
    			tcache.prefetchb->local_result.power) *pppm_lkg;
    	if (cache_p==Write_back)
    	{
    		tcache.power = tcache.power + tcache.wbb->local_result.power*pppm_lkg;
    	}


    	LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
		//No LSQ in GPUs (Syed)
   	LSQ->power.reset();
    	power     = power + dcache.power + LSQ->power +sharedmemory.power + ccache.power + tcache.power;

    	if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
    	{
    		LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
    		power     = power + LoadQ->power;
    	}
    }
    else
    {
//    	dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
//    			dcache.missb->local_result.power +
//    			dcache.ifb->local_result.power +
//    			dcache.prefetchb->local_result.power +
//    			dcache.wbb->local_result.power)*pppm_lkg;
      rt_power.reset();
      sharedmemory.rt_power.reset(); //Jingwen
      tcache.rt_power.reset();
      ccache.rt_power.reset();
      dcache.rt_power.reset();
      LSQ->rt_power.reset();


    	sharedmemory.rt_power = sharedmemory.power_t + (sharedmemory.caches->local_result.power +
    			sharedmemory.missb->local_result.power +
    			sharedmemory.ifb->local_result.power +
    			sharedmemory.prefetchb->local_result.power )*pppm_lkg;

    	if (cache_p==Write_back)
    	{
    		sharedmemory.rt_power = sharedmemory.rt_power + sharedmemory.wbb->local_result.power*pppm_lkg;
    	}

    	dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
    			dcache.missb->local_result.power +
    			dcache.ifb->local_result.power +
    			dcache.prefetchb->local_result.power )*pppm_lkg;
    	if (cache_p==Write_back)
    	{
    		dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg;
    	}

    	ccache.rt_power = ccache.power_t + (ccache.caches->local_result.power +
    			ccache.missb->local_result.power +
    			ccache.ifb->local_result.power +
    			ccache.prefetchb->local_result.power )*pppm_lkg;
    	if (cache_p==Write_back)
    	{
    		ccache.rt_power = ccache.rt_power + ccache.wbb->local_result.power*pppm_lkg;
    	}

    	tcache.rt_power = tcache.power_t + (tcache.caches->local_result.power +
    			tcache.missb->local_result.power +
    			tcache.ifb->local_result.power +
    			tcache.prefetchb->local_result.power )*pppm_lkg;
    	if (cache_p==Write_back)
    	{
    		tcache.rt_power = tcache.rt_power + tcache.wbb->local_result.power*pppm_lkg;
    	}



    	LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
		LSQ->rt_power.reset();
    	rt_power     = rt_power + dcache.rt_power + LSQ->rt_power + sharedmemory.rt_power + ccache.rt_power + tcache.rt_power;

    	if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
    	{
    		LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
    		rt_power     = rt_power + LoadQ->rt_power;
    	}
    }
}


void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
	if (!exist) return;
	string indent_str(indent, ' ');
	string indent_str_next(indent+2, ' ');
	bool long_channel = XML->sys.longer_channel_device;


	if (is_tdp)
	{

		cout << indent_str << "Shared Memory:" << endl;
		cout << indent_str_next << "Area = " << sharedmemory.area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << sharedmemory.power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? sharedmemory.power.readOp.longer_channel_leakage:sharedmemory.power.readOp.leakage )<<" W" << endl;
		cout << indent_str_next << "Gate Leakage = " << sharedmemory.power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << sharedmemory.rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;


		cout << indent_str << "Data Cache:" << endl;
		cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl;
		cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;

		cout << indent_str << "Constant Cache:" << endl;
		cout << indent_str_next << "Area = " << ccache.area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << ccache.power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl;
		cout << indent_str_next << "Gate Leakage = " << ccache.power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << ccache.rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic Energy = " << ccache.rt_power.readOp.dynamic<<" J"<<endl;
      cout << indent_str_next << "Execution Time = " << executionTime<<" s"<<endl;
		cout <<endl;


		cout << indent_str << "Texture Cache:" << endl;
		cout << indent_str_next << "Area = " << tcache.area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << tcache.power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl;
		cout << indent_str_next << "Gate Leakage = " << tcache.power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << tcache.rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		
    if (coredynp.core_ty==Inorder)
		{
			cout << indent_str << "Load/Store Queue:" << endl;
			cout << indent_str_next << "Area = " << LSQ->area.get_area()*1e-6  << " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage)  << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
		}
		else

		{
			if (XML->sys.core[ithCore].load_buffer_size >0)
			{
				cout << indent_str << "LoadQ:" << endl;
				cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl;
				cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage)  << " W" << endl;
				cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage  << " W" << endl;
				cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
				cout <<endl;
			}
			cout << indent_str<< "StoreQ:" << endl;
			cout << indent_str_next << "Area = " << LSQ->area.get_area()  *1e-6<< " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage)  << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime<< " W" << endl;
			cout <<endl;
		}
	}
	else
	{

		cout << indent_str_next << "Shared Memory    Peak Dynamic = " << sharedmemory.rt_power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Shared Memory    Subthreshold Leakage = " << sharedmemory.rt_power.readOp.leakage <<" W" << endl;
		cout << indent_str_next << "Shared Memory    Gate Leakage = " << sharedmemory.rt_power.readOp.gate_leakage << " W" << endl;


		cout << indent_str_next << "Data Cache    Peak Dynamic = " << dcache.rt_power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Data Cache    Subthreshold Leakage = " << dcache.rt_power.readOp.leakage <<" W" << endl;
		cout << indent_str_next << "Data Cache    Gate Leakage = " << dcache.rt_power.readOp.gate_leakage << " W" << endl;

		cout << indent_str_next << "Constant Cache    Peak Dynamic = " << ccache.rt_power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Constant Cache    Subthreshold Leakage = " << ccache.rt_power.readOp.leakage <<" W" << endl;
		cout << indent_str_next << "Constant Cache    Gate Leakage = " << ccache.rt_power.readOp.gate_leakage << " W" << endl;

		cout << indent_str_next << "Texture Cache    Peak Dynamic = " << tcache.rt_power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Texture Cache    Subthreshold Leakage = " << tcache.rt_power.readOp.leakage <<" W" << endl;
		cout << indent_str_next << "Texture Cache    Gate Leakage = " << tcache.rt_power.readOp.gate_leakage << " W" << endl;
		
    if (coredynp.core_ty==Inorder)
		{
			cout << indent_str_next << "Load/Store Queue   Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Load/Store Queue   Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage  << " W" << endl;
			cout << indent_str_next << "Load/Store Queue   Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage  << " W" << endl;
		}
		else
		{
			cout << indent_str_next << "LoadQ   Peak Dynamic = " << LoadQ->rt_power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "LoadQ   Subthreshold Leakage = " << LoadQ->rt_power.readOp.leakage  << " W" << endl;
			cout << indent_str_next << "LoadQ   Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "StoreQ   Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "StoreQ   Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage  << " W" << endl;
			cout << indent_str_next << "StoreQ   Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage  << " W" << endl;
		}
	}

}

void MemManU::computeEnergy(bool is_tdp)
{

	if (!exist) return;
	if (is_tdp)
    {
    	//init stats for Peak
    	itlb->stats_t.readAc.access  = itlb->l_ip.num_search_ports;
    	itlb->stats_t.readAc.miss    = 0;
    	itlb->stats_t.readAc.hit     = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
    	itlb->tdp_stats = itlb->stats_t;

    	dtlb->stats_t.readAc.access  = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
    	dtlb->stats_t.readAc.miss    = 0;
    	dtlb->stats_t.readAc.hit     = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
    	dtlb->tdp_stats = dtlb->stats_t;
     }
    else
    {
    	//init stats for Runtime Dynamic (RTP)
    	itlb->stats_t.readAc.access  = XML->sys.core[ithCore].itlb.total_accesses;
    	itlb->stats_t.readAc.miss    = XML->sys.core[ithCore].itlb.total_misses;
    	itlb->stats_t.readAc.hit     = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
    	itlb->rtp_stats = itlb->stats_t;

    	dtlb->stats_t.readAc.access  = XML->sys.core[ithCore].dtlb.total_accesses;
    	dtlb->stats_t.readAc.miss    = XML->sys.core[ithCore].dtlb.total_misses;
    	dtlb->stats_t.readAc.hit     = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
    	dtlb->rtp_stats = dtlb->stats_t;
    }

    itlb->power_t.reset();
    dtlb->power_t.reset();
	itlb->power_t.readOp.dynamic +=  itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
	                      +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic;
	dtlb->power_t.readOp.dynamic +=  dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
	                      +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic;

	if (is_tdp)
	    {
		itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg;
		dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
		power     = power + itlb->power + dtlb->power;
	    }
	    else
	    {
			itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg;
			dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
			rt_power     = rt_power + itlb->rt_power + dtlb->rt_power;
	    }

}

void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
	if (!exist) return;
	string indent_str(indent, ' ');
	string indent_str_next(indent+2, ' ');
	bool long_channel = XML->sys.longer_channel_device;




	if (is_tdp)
	{
		cout << indent_str << "Itlb:" << endl;
		cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl;
		cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		cout << indent_str<< "Dtlb:" << endl;
		cout << indent_str_next << "Area = " << dtlb->area.get_area()*1e-6  << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << dtlb->power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? dtlb->power.readOp.longer_channel_leakage:dtlb->power.readOp.leakage)  << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << dtlb->power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << dtlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
	}
	else
	{
		cout << indent_str_next << "Itlb    Peak Dynamic = " << itlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Itlb    Subthreshold Leakage = " << itlb->rt_power.readOp.leakage <<" W" << endl;
		cout << indent_str_next << "Itlb    Gate Leakage = " << itlb->rt_power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Dtlb   Peak Dynamic = " << dtlb->rt_power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Dtlb   Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage  << " W" << endl;
		cout << indent_str_next << "Dtlb   Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage  << " W" << endl;
	}

}

void RegFU::computeEnergy(bool is_tdp)
{
/*
 * Architecture RF and physical RF cannot be present at the same time.
 * Therefore, the RF stats can only refer to either ARF or PRF;
 * And the same stats can be used for both.
 */
	if (!exist) return;

  executionTime=XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);//Syed
 //RF crossbar power (Syed Gilani)
 xbar_rfu->compute_power();


 //Arbiter power
 arbiter_rfu->compute_power(); 

	if (is_tdp)
    {
        //RF power -- modified by Syed
    	//init stats for Peak

    	IRF->stats_t.readAc.access  = 4;
    	IRF->stats_t.writeAc.access  = 4;
    	IRF->tdp_stats = IRF->stats_t;

    	IRF->stats_t.readAc.access  = 2;
    	IRF->stats_t.writeAc.access  = 1;
    	IRF->tdp_stats = IRF->stats_t;


    	OPC->stats_t.readAc.access  = 32;
    	OPC->stats_t.writeAc.access  = 32;
    	OPC->tdp_stats = OPC->stats_t;

        //Commented by Syed (GPUs have a single RF which we model by IRF)
    	//FRF->stats_t.readAc.access  = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
    	//FRF->stats_t.writeAc.access  = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
    	//FRF->tdp_stats = FRF->stats_t;
    	if (coredynp.regWindowing)
    	{
        	RFWIN->stats_t.readAc.access  = 0;//0.5*RFWIN->l_ip.num_rw_ports;
        	RFWIN->stats_t.writeAc.access  = 0;//0.5*RFWIN->l_ip.num_rw_ports;
        	RFWIN->tdp_stats = RFWIN->stats_t;
    	}
	 } /* if (is_tdp) */ 
    else
    {
    	//init stats for Runtime Dynamic (RTP)
		//in Tesla each RF operand accesses 2 banks, so multiply acceses by 2
		// (read and write energies of reg_file are per bank)(Tesla) : Syed
		// Also, for a SIMD width of 8 and warp size of 32 threads, 4 accesses
		// (each accessing 2 banks) need to be performed per operand
if (XML->sys.architecture==1){
    	IRF->stats_t.readAc.access  = (XML->sys.core[ithCore].int_regfile_reads/32)*(4*2);///1.5; 
    	IRF->stats_t.writeAc.access  = (XML->sys.core[ithCore].int_regfile_writes/32)*(4*2);///1.5;
} else {
    	IRF->stats_t.readAc.access  = (XML->sys.core[ithCore].int_regfile_reads/32)*(2*4);///1.5;//TODO: no diff on archi and phy
    	IRF->stats_t.writeAc.access  = (XML->sys.core[ithCore].int_regfile_writes/32)*(2*4);///1.5;
}
    	IRF->rtp_stats = IRF->stats_t;

    	OPC->stats_t.readAc.access  = (XML->sys.core[ithCore].int_regfile_reads)/*/1.5*/+XML->sys.core[ithCore].non_rf_operands;///1.5;//TODO: no diff on archi and phy
    	OPC->stats_t.writeAc.access  = 0;
    	OPC->rtp_stats = OPC->stats_t;

      //cout<< "IRF read energy: "<<    IRF->local_result.power.readOp.dynamic<<endl;
      //cout<< "IRF write energy: "<<    IRF->local_result.power.writeOp.dynamic<<endl;
    	//FRF->stats_t.readAc.access  = XML->sys.core[ithCore].float_regfile_reads;
    	//FRF->stats_t.writeAc.access  = XML->sys.core[ithCore].float_regfile_writes;
    	//FRF->rtp_stats = FRF->stats_t;
    	if (coredynp.regWindowing)
    	{
        	RFWIN->stats_t.readAc.access  = XML->sys.core[ithCore].function_calls*16;
        	RFWIN->stats_t.writeAc.access  = XML->sys.core[ithCore].function_calls*16;
        	RFWIN->rtp_stats = RFWIN->stats_t;

        	IRF->stats_t.readAc.access  = XML->sys.core[ithCore].int_regfile_reads +
        	     XML->sys.core[ithCore].function_calls*16;
        	IRF->stats_t.writeAc.access  = XML->sys.core[ithCore].int_regfile_writes +
        	     XML->sys.core[ithCore].function_calls*16;
        	IRF->rtp_stats = IRF->stats_t;

    	}
    }
	IRF->power_t.reset();
	FRF->power_t.reset();
	OPC->power_t.reset();
	//IRF->power_t  =  IRF->power_t + IRF->local_result.power;// + xbar_rfu->power + arbiter_rfu->power;


	
	IRF->power_t.readOp.dynamic  =  (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic
			+IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic);
	OPC->power_t.readOp.dynamic  =  (OPC->stats_t.readAc.access*OPC->local_result.power.readOp.dynamic);

	if (coredynp.regWindowing)
	{
		RFWIN->power_t.reset();
		RFWIN->power_t.readOp.dynamic   +=  (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic +
				RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic);
	}

	if (is_tdp)
	{

	  	//cout<<"pre: IRF_power_t: "<<IRF->power.readOp.dynamic<<" ("<<IRF->power.readOp.dynamic*clockRate<<") "<<" IRF_localresult: "<<
		 //                   IRF->local_result.power.readOp.dynamic<<endl;
		//Syed: removed the multiplication of power by hardware threads
		//since the one IRF  is shared by all threads in GPUs

		//FRF->power  =  FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;

	  double pppm_lkg_banks[4];
	  set_pppm(pppm_lkg_banks, 0,XML->sys.core[ithCore].collector_units,XML->sys.core[ithCore].collector_units );
	  IRF->power = (IRF->power_t) + IRF->local_result.power*pppm_lkg;
	  IRF->power.readOp.dynamic=IRF->power_t.readOp.dynamic*1;
	  OPC->power = (OPC->power_t) + OPC->local_result.power *pppm_lkg_banks;
	  OPC->power.readOp.dynamic=OPC->power_t.readOp.dynamic*1;

	  
	  power	    =  power + (IRF->power+OPC->power);


		if (coredynp.regWindowing)
		{
			RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
			power        = power + RFWIN->power;
		}
	} /* if (is_tdp) */	
	else
	{
	  //Removed *coredynp.pppm_lkg_multhread since all hardware threads shared the same IRF
		IRF->rt_power  =  IRF->power_t + IRF->local_result.power*pppm_lkg;/* *coredynp.pppm_lkg_multhread;*/
		OPC->rt_power = OPC->power_t +  OPC->local_result.power*pppm_lkg;
if(XML->sys.architecture==1){
	//Each warp operand accesses the crossbar
   xbar_rfu->rt_power.readOp.dynamic=((XML->sys.core[ithCore].int_regfile_reads/(32/**1.5*/))+(XML->sys.core[ithCore].non_rf_operands/(32/**1.5*/)))*xbar_rfu->power.readOp.dynamic;
} else {
	xbar_rfu->rt_power.readOp.dynamic=((XML->sys.core[ithCore].int_regfile_reads/(32/**1.5*/))+(XML->sys.core[ithCore].non_rf_operands/(32/**1.5*/)))*xbar_rfu->power.readOp.dynamic;
}
		arbiter_rfu->rt_power.readOp.dynamic=((XML->sys.core[ithCore].int_regfile_reads/(32/**1.5*/))+(XML->sys.core[ithCore].non_rf_operands/(32/**1.5*/)))*arbiter_rfu->power.readOp.dynamic;


		rt_power	   =  rt_power + (IRF->power_t /*+ FRF->power_t*/+xbar_rfu->rt_power+arbiter_rfu->rt_power+OPC->power_t);
		if (coredynp.regWindowing)
		{
			RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
			rt_power        = rt_power + RFWIN->rt_power;
		}
	}
}


void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
	if (!exist) return;
	string indent_str(indent, ' ');
	string indent_str_next(indent+2, ' ');
	bool long_channel = XML->sys.longer_channel_device;

	if (is_tdp)
	{	cout << indent_str << "Register file banks: " << endl;
		cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl;

		cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
      cout << indent_str << "Crossbar (Integer RF):" << endl;
		cout << indent_str_next << "Area = " << xbar_rfu->area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << xbar_rfu->power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? xbar_rfu->power.readOp.longer_channel_leakage:xbar_rfu->power.readOp.leakage) <<" W" << endl;

		cout << indent_str_next << "Gate Leakage = " << xbar_rfu->power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << xbar_rfu->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
      
		cout << indent_str << "Arbiter (Integer RF):" << endl;
		cout << indent_str_next << "Area = " << arbiter_rfu->area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << arbiter_rfu->power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? arbiter_rfu->power.readOp.longer_channel_leakage:arbiter_rfu->power.readOp.leakage) <<" W" << endl;

		cout << indent_str_next << "Gate Leakage = " << arbiter_rfu->power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << arbiter_rfu->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;

		/*
		cout << indent_str<< "Floating Point RF:" << endl;
		cout << indent_str_next << "Area = " << FRF->area.get_area()*1e-6  << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << FRF->power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? FRF->power.readOp.longer_channel_leakage:FRF->power.readOp.leakage)  << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << FRF->power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		*/
		if (coredynp.regWindowing)
		{
			cout << indent_str << "Register Windows:" << endl;
			cout << indent_str_next << "Area = " << RFWIN->area.get_area() *1e-6 << " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << RFWIN->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? RFWIN->power.readOp.longer_channel_leakage:RFWIN->power.readOp.leakage)  << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << RFWIN->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
		}
	}
	else
	{
		cout << indent_str_next << "Integer RF    Peak Dynamic = " << IRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Integer RF    Subthreshold Leakage = " << IRF->rt_power.readOp.leakage <<" W" << endl;
		cout << indent_str_next << "Integer RF    Gate Leakage = " << IRF->rt_power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Floating Point RF   Peak Dynamic = " << FRF->rt_power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Floating Point RF   Subthreshold Leakage = " << FRF->rt_power.readOp.leakage  << " W" << endl;
		cout << indent_str_next << "Floating Point RF   Gate Leakage = " << FRF->rt_power.readOp.gate_leakage  << " W" << endl;
		if (coredynp.regWindowing)
		{
			cout << indent_str_next << "Register Windows   Peak Dynamic = " << RFWIN->rt_power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Register Windows   Subthreshold Leakage = " << RFWIN->rt_power.readOp.leakage  << " W" << endl;
			cout << indent_str_next << "Register Windows   Gate Leakage = " << RFWIN->rt_power.readOp.gate_leakage  << " W" << endl;
		}
	}
}


void EXECU::computeEnergy(bool is_tdp)
{
	if (!exist) return;
	//Syed
	double pppm_t[4]    = {1,1,1,1};
	double pppm_freqScaling[4]    = {rf_fu_clockRate/clockRate,1,1,1};
  executionTime=XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);//Syed


//	rfu->power.reset();
  rfu->rt_power.reset();
//	scheu->power.reset();
	scheu->rt_power.reset();
//	exeu->power.reset();
	exeu->rt_power.reset();

	rfu->computeEnergy(is_tdp);
	scheu->computeEnergy(is_tdp);
	exeu->computeEnergy(is_tdp);
	if (coredynp.num_fpus >0)
	{
    fp_u->rt_power.reset();
		fp_u->computeEnergy(is_tdp);
	}
	if (coredynp.num_muls >0)
	{
    mul->rt_power.reset();
		mul->computeEnergy(is_tdp);
	}
  bypass.rt_power.reset();

	if (is_tdp)
	{
		set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
		//bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t;
		if (coredynp.num_muls >0)
		{
			set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
			//No conventional bypassing in GPU (Syed)
			//bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
			power      = power + mul->power*pppm_freqScaling;
		}

		if (coredynp.num_fpus>0)
		{
			set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction.
			//No conventional bypassing in GPU (Syed)			
			//bypass.power = bypass.power + fp_bypass->power*pppm_t  + fpTagBypass->power*pppm_t ;
			power      = power + fp_u->power*pppm_freqScaling;

		}
		//No conventional bypassing in GPU (Syed)

		power      = power + rfu->power*pppm_freqScaling + exeu->power*pppm_freqScaling /*+ bypass.power*/ + scheu->power
		                   ;


	}
	else
	{
		set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses);
		//bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t;
		//bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t;

		if (coredynp.num_muls >0)
		{
			set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction.
			//bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
			rt_power      = rt_power + mul->rt_power;
		}

		if (coredynp.num_fpus>0)
		{
			set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses);
			//bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t;
			//bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t;
			rt_power      = rt_power + fp_u->rt_power;
		}
		//No conventional bypassing in GPU (Syed)
		rt_power      = rt_power + rfu->rt_power*pppm_freqScaling + exeu->rt_power*pppm_freqScaling + /*bypass.rt_power +*/ scheu->rt_power;
	}
}

void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
	if (!exist) return;
	string indent_str(indent, ' ');
	string indent_str_next(indent+2, ' ');
	bool long_channel = XML->sys.longer_channel_device;


//	cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl;
	if (is_tdp)
	{
		cout << indent_str << "Register Files:" << endl;
		cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*rf_fu_clockRate << " W" << endl;
		//cout << "rf_fu Clock rate: "<< rf_fu_clockRate<<endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl;
		cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		if (plevel>3){
			rfu->displayEnergy(indent+4,is_tdp);
		}
		cout << indent_str << "Instruction Scheduler:" << endl;
		cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6  << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage)  << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
		if (plevel>3){
			scheu->displayEnergy(indent+4,is_tdp);
		}
		exeu->displayEnergy(indent,is_tdp);
		if (coredynp.num_fpus>0)
		{
			fp_u->displayEnergy(indent,is_tdp);
		}
		if (coredynp.num_muls >0)
		{
			mul->displayEnergy(indent,is_tdp);
		}
		cout << indent_str << "Results Broadcast Bus:" << endl;
		cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6  << " mm^2" << endl;
		cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Subthreshold Leakage = "
			<< (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl;
		cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout <<endl;
	}
	else
	{
		cout << indent_str_next << "Register Files    Peak Dynamic = " << rfu->rt_power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str_next << "Register Files    Subthreshold Leakage = " << rfu->rt_power.readOp.leakage <<" W" << endl;
		cout << indent_str_next << "Register Files    Gate Leakage = " << rfu->rt_power.readOp.gate_leakage << " W" << endl;
		cout << indent_str_next << "Instruction Sheduler   Peak Dynamic = " << scheu->rt_power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Instruction Sheduler   Subthreshold Leakage = " << scheu->rt_power.readOp.leakage  << " W" << endl;
		cout << indent_str_next << "Instruction Sheduler   Gate Leakage = " << scheu->rt_power.readOp.gate_leakage  << " W" << endl;
		cout << indent_str_next << "Results Broadcast Bus   Peak Dynamic = " << bypass.rt_power.readOp.dynamic*clockRate  << " W" << endl;
		cout << indent_str_next << "Results Broadcast Bus   Subthreshold Leakage = " << bypass.rt_power.readOp.leakage  << " W" << endl;
		cout << indent_str_next << "Results Broadcast Bus   Gate Leakage = " << bypass.rt_power.readOp.gate_leakage  << " W" << endl;
	}

}






//Jingwen
void Core::compute()
{
    //power_point_product_masks
    double pppm_t[4]    = {1,1,1,1};
    double rtp_pipeline_coe;
    double num_units = 4.0;
    Pipeline_energy=0;

	 //Set pipeline duty cycle for this inteval 
	coredynp.pipeline_duty_cycle=XML->sys.core[ithCore].pipeline_duty_cycle;
    rt_power.reset();
    ifu->rt_power.reset();
    lsu->rt_power.reset();
    mmu->rt_power.reset();
    exu->rt_power.reset();


		ifu->computeEnergy(false);
		lsu->computeEnergy(false);
		mmu->computeEnergy(false);
		exu->computeEnergy(false);


		if (XML->sys.homogeneous_cores==1)
		{
				rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores;
		}
		else
		{

			rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles;
			//Jingwen
			if (coredynp.total_cycles != XML->sys.total_cycles)
			{
				cout << "total cycle not match!" << endl;
				exit(1);
			}
		}
      
		set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);

		if (ifu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t;
			rt_power     = rt_power + ifu->rt_power ;
		}
    
		if (lsu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t;
			rt_power     = rt_power  + lsu->rt_power;
		}
		if (exu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			exu->rt_power = exu->rt_power + corepipe->power*pppm_t;
			rt_power     = rt_power  + exu->rt_power;
		}
		if (mmu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t;
			rt_power     = rt_power +  mmu->rt_power ;
		}

		rt_power     = rt_power +  undiffCore->power;
    

		if (XML->sys.Private_L2)
		{

			l2cache->computeEnergy(false);
			rt_power = rt_power  + l2cache->rt_power;
		}
		
		IdleCoreEnergy=XML->sys.num_idle_cores * XML->sys.idle_core_power* executionTime;

		rt_power.readOp.dynamic += IdleCoreEnergy;

}




void Core::computeEnergy(bool is_tdp)
{
	//power_point_product_masks
	double pppm_t[4]    = {1,1,1,1};
    double rtp_pipeline_coe;
    double num_units = 4.0;
    Pipeline_energy=0;

    if (XML->sys.homogeneous_cores==1)
    {
       rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores;
    }
    else
    {
       rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles;
    }
 

	if (is_tdp)
	{
		ifu->computeEnergy(is_tdp);
		lsu->computeEnergy(is_tdp);
		mmu->computeEnergy(is_tdp);
		exu->computeEnergy(is_tdp);

		if (coredynp.core_ty==OOO)
		{
			num_units = 5.0;
			rnu->computeEnergy(is_tdp);
			set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
			if (rnu->exist)
			{
				rnu->power = rnu->power + corepipe->power*pppm_t;
				power     = power + rnu->power;
			}
		}

		if (ifu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
//			cout << "IFU = " << ifu->power.readOp.dynamic*clockRate  << " W" << endl;
			ifu->power = ifu->power + corepipe->power*pppm_t;
//			cout << "IFU = " << ifu->power.readOp.dynamic*clockRate  << " W" << endl;
//			cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units  << " W" << endl;
			power     = power + ifu->power;
//			cout << "core = " << power.readOp.dynamic*clockRate  << " W" << endl;
		}
		if (lsu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
			lsu->power = lsu->power + corepipe->power*pppm_t;
//			cout << "LSU = " << lsu->power.readOp.dynamic*clockRate  << " W" << endl;
			power     = power + lsu->power;
//			cout << "core = " << power.readOp.dynamic*clockRate  << " W" << endl;
		}
		if (exu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
			//cout<<"ExPowerScalingFactor:"<<coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle<<endl;
			exu->power = exu->power + corepipe->power*pppm_t;
//			cout << "EXE = " << exu->power.readOp.dynamic*clockRate  << " W" << endl;
			power     = power + exu->power;
//			cout << "core = " << power.readOp.dynamic*clockRate  << " W" << endl;
		}
		if (mmu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
			mmu->power = mmu->power + corepipe->power*pppm_t;
//			cout << "MMU = " << mmu->power.readOp.dynamic*clockRate  << " W" << endl;
			power     = power +  mmu->power;
//			cout << "core = " << power.readOp.dynamic*clockRate  << " W" << endl;
		}

		power     = power +  undiffCore->power;

		if (XML->sys.Private_L2)
		{

			l2cache->computeEnergy(is_tdp);
			set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1);
			//l2cache->power = l2cache->power*pppm_t;
			power = power  + l2cache->power*pppm_t;
		}

	}
	else
	{
    rt_power.reset();




		ifu->computeEnergy(is_tdp);
		lsu->computeEnergy(is_tdp);
		mmu->computeEnergy(is_tdp);
		exu->computeEnergy(is_tdp);
		if (coredynp.core_ty==OOO)
		{
			num_units = 5.0;
			rnu->computeEnergy(is_tdp);
        	set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
			if (rnu->exist)
			{
        	rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t;

			rt_power      = rt_power + rnu->rt_power;
			}
		}
		else
		{

		    set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
		}

		if (ifu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t;
			rt_power     = rt_power + ifu->rt_power ;
		}
		if (lsu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t;
			rt_power     = rt_power  + lsu->rt_power;
		}
		if (exu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			exu->rt_power = exu->rt_power + corepipe->power*pppm_t;
			rt_power     = rt_power  + exu->rt_power;
		}
		if (mmu->exist)
		{
			Pipeline_energy+=corepipe->power.readOp.dynamic* (coredynp.num_pipelines*rtp_pipeline_coe/num_units);
			mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t;
			rt_power     = rt_power +  mmu->rt_power ;
		}

		rt_power     = rt_power +  undiffCore->power;
//		cout << "EXE = " << exu->power.readOp.dynamic*clockRate  << " W" << endl;
		if (XML->sys.Private_L2)
		{
			l2cache->computeEnergy(is_tdp);
			//set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1);
			//l2cache->rt_power = l2cache->rt_power*pppm_t;
			rt_power = rt_power  + l2cache->rt_power;
		}

	}

}

void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
	string indent_str(indent, ' ');
	string indent_str_next(indent+2, ' ');
	bool long_channel = XML->sys.longer_channel_device;
	if (is_tdp)
	{
		cout << "Core:" << endl;
		cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
		cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
		cout << indent_str << "Subthreshold Leakage = "
			<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
		//cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
		cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
		cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
		cout<<endl;
		if (ifu->exist)
		{
			cout << indent_str << "Instruction Fetch Unit:" << endl;
			cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl;
			//cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl;
			cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
			if (plevel >2){
				ifu->displayEnergy(indent+4,plevel,is_tdp);
			}
		}
		if (coredynp.core_ty==OOO)
		{
			if (rnu->exist)
			{
				cout << indent_str<< "Renaming Unit:" << endl;
				cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6  << " mm^2" << endl;
				cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate  << " W" << endl;
				cout << indent_str_next << "Subthreshold Leakage = "
					<< (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage)  << " W" << endl;
				//cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage  << " W" << endl;
				cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage  << " W" << endl;
				cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl;
				cout <<endl;
				if (plevel >2){
					rnu->displayEnergy(indent+4,plevel,is_tdp);
				}
			}

		}
		if (lsu->exist)
		{
			cout << indent_str<< "Load Store Unit:" << endl;
			cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6  << " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl;
			//cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage  << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
			if (plevel >2){
				lsu->displayEnergy(indent+4,plevel,is_tdp);
			}
		}
		if (mmu->exist)
		{
			cout << indent_str<< "Memory Management Unit:" << endl;
			cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage)   << " W" << endl;
			//cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage   << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
			if (plevel >2){
				mmu->displayEnergy(indent+4,plevel,is_tdp);
			}
		}
		if (exu->exist)
		{
			cout << indent_str<< "Execution Unit:" << endl;
			cout << indent_str_next << "Area = " << exu->area.get_area()  *1e-6<< " mm^2" << endl;
			cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate  << " W" << endl;
			cout << indent_str_next << "Peak Dynamic Energy = " << exu->power.readOp.dynamic  << " W" << endl;
			cout << indent_str_next << "clock Rate = " << clockRate  << " W" << endl;

			cout << indent_str_next << "Subthreshold Leakage = "
				<< (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage)   << " W" << endl;
			//cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl;
			cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage  << " W" << endl;
			cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl;
			cout <<endl;
			if (plevel >2){
				exu->displayEnergy(indent+4,plevel,is_tdp);
			}
		}
//		if (plevel >2)
//		{
//			if (undiffCore->exist)
//			{
//				cout << indent_str << "Undifferentiated Core" << endl;
//				cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl;
//				cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl;
////				cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl;
//				cout << indent_str_next << "Subthreshold Leakage = "
//								<< (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage)   << " W" << endl;
//				cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl;
//				//		cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl;
//				cout <<endl;
//			}
//		}
		if (XML->sys.Private_L2)
		{

			l2cache->displayEnergy(4,is_tdp);
		}

		cout << indent_str<< "Idle Core: " << endl;
			cout << indent_str_next << "Runtime Dynamic = " << IdleCoreEnergy/executionTime << " W\n" << endl;

	}
	else
	{
//		cout << indent_str_next << "Instruction Fetch Unit    Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
//		cout << indent_str_next << "Instruction Fetch Unit    Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
//		cout << indent_str_next << "Instruction Fetch Unit    Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
//		cout << indent_str_next << "Load Store Unit   Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate  << " W" << endl;
//		cout << indent_str_next << "Load Store Unit   Subthreshold Leakage = " << lsu->rt_power.readOp.leakage  << " W" << endl;
//		cout << indent_str_next << "Load Store Unit   Gate Leakage = " << lsu->rt_power.readOp.gate_leakage  << " W" << endl;
//		cout << indent_str_next << "Memory Management Unit   Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate  << " W" << endl;
//		cout << indent_str_next << "Memory Management Unit   Subthreshold Leakage = " << mmu->rt_power.readOp.leakage  << " W" << endl;
//		cout << indent_str_next << "Memory Management Unit   Gate Leakage = " << mmu->rt_power.readOp.gate_leakage  << " W" << endl;
//		cout << indent_str_next << "Execution Unit   Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate  << " W" << endl;
//		cout << indent_str_next << "Execution Unit   Subthreshold Leakage = " << exu->rt_power.readOp.leakage  << " W" << endl;
//		cout << indent_str_next << "Execution Unit   Gate Leakage = " << exu->rt_power.readOp.gate_leakage  << " W" << endl;
	}
}
InstFetchU ::~InstFetchU(){

	if (!exist) return;
	if(IB) 	                   {delete IB; IB = 0;}
	if(ID_inst) 	           {delete ID_inst; ID_inst = 0;}
	if(ID_operand) 	           {delete ID_operand; ID_operand = 0;}
	if(ID_misc) 	           {delete ID_misc; ID_misc = 0;}
	if (coredynp.predictionW>0)
	{
		if(BTB) 	               {delete BTB; BTB = 0;}
		if(BPT) 	               {delete BPT; BPT = 0;}
	}
}

BranchPredictor ::~BranchPredictor(){

	if (!exist) return;
	if(globalBPT) 	           {delete globalBPT; globalBPT = 0;}
	if(localBPT) 	           {delete localBPT; localBPT = 0;}
    if(L1_localBPT) 	       {delete L1_localBPT; L1_localBPT = 0;}
    if(L2_localBPT) 	       {delete L2_localBPT; L2_localBPT = 0;}
    if(chooser) 	           {delete chooser; chooser = 0;}
    if(RAS) 	               {delete RAS; RAS = 0;}
	}

RENAMINGU ::~RENAMINGU(){

	if (!exist) return;
	if(iFRAT ) 	               {delete iFRAT; iFRAT = 0;}
    if(fFRAT ) 	               {delete fFRAT; fFRAT =0;}
    if(iRRAT)                  {delete iRRAT; iRRAT = 0;}
    if(iFRAT)                  {delete iFRAT; iFRAT = 0;}
    if(ifreeL)                 {delete ifreeL;ifreeL= 0;}
    if(ffreeL)                 {delete ffreeL;ffreeL= 0;}
    if(idcl)                   {delete idcl;  idcl = 0;}
    if(fdcl)                   {delete fdcl;  fdcl = 0;}
    if(RAHT)                   {delete RAHT;  RAHT = 0;}
	}

LoadStoreU ::~LoadStoreU(){

	if (!exist) return;
	if(LSQ) 	               {delete LSQ; LSQ = 0;}
	}

MemManU ::~MemManU(){

	if (!exist) return;
	if(itlb) 	               {delete itlb; itlb = 0;}
    if(dtlb) 	               {delete dtlb; dtlb = 0;}
	}

RegFU ::~RegFU(){

	if (!exist) return;
	if(IRF) 	               {delete IRF; IRF = 0;}
    if(FRF) 	               {delete FRF; FRF = 0;}
    if(RFWIN) 	               {delete RFWIN; RFWIN = 0;}
	}

SchedulerU ::~SchedulerU(){

	if (!exist) return;
	if(int_inst_window) 	   {delete int_inst_window; int_inst_window = 0;}
	if(fp_inst_window) 	       {delete int_inst_window; int_inst_window = 0;}
	if(ROB) 	               {delete ROB; ROB = 0;}
    if(instruction_selection)  {delete instruction_selection;instruction_selection = 0;}
	}

EXECU ::~EXECU(){

	if (!exist) return;
	if(int_bypass) 	           {delete int_bypass; int_bypass = 0;}
    if(intTagBypass) 	       {delete intTagBypass; intTagBypass =0;}
    if(int_mul_bypass) 	       {delete int_mul_bypass; int_mul_bypass = 0;}
    if(intTag_mul_Bypass) 	   {delete intTag_mul_Bypass; intTag_mul_Bypass =0;}
    if(fp_bypass) 	           {delete fp_bypass;fp_bypass = 0;}
    if(fpTagBypass) 	       {delete fpTagBypass;fpTagBypass = 0;}
    if(fp_u)                   {delete fp_u;fp_u = 0;}
    if(exeu)                   {delete exeu;exeu = 0;}
    if(mul)                    {delete mul;mul = 0;}
    if(rfu)                    {delete rfu;rfu = 0;}
	if(scheu) 	               {delete scheu; scheu = 0;}
	}

Core ::~Core(){

	if(ifu) 	               {delete ifu; ifu = 0;}
	if(lsu) 	               {delete lsu; lsu = 0;}
	if(rnu) 	               {delete rnu; rnu = 0;}
	if(mmu) 	               {delete mmu; mmu = 0;}
	if(exu) 	               {delete exu; exu = 0;}
    if(corepipe) 	           {delete corepipe; corepipe = 0;}
    if(undiffCore)             {delete undiffCore;undiffCore = 0;}
    if(l2cache)                {delete l2cache;l2cache = 0;}
	}

void Core::set_core_param()
{
	coredynp.opt_local = XML->sys.core[ithCore].opt_local;
	coredynp.x86 = XML->sys.core[ithCore].x86;
	coredynp.Embedded = XML->sys.Embedded;
	coredynp.core_ty   = (enum Core_type)XML->sys.core[ithCore].machine_type;
	coredynp.rm_ty     = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme;
    coredynp.fetchW    = XML->sys.core[ithCore].fetch_width;
    coredynp.decodeW   = XML->sys.core[ithCore].decode_width;
    coredynp.issueW    = XML->sys.core[ithCore].issue_width;
    coredynp.peak_issueW   = XML->sys.core[ithCore].peak_issue_width;
    coredynp.commitW       = XML->sys.core[ithCore].commit_width;
    coredynp.peak_commitW  = XML->sys.core[ithCore].peak_issue_width;
    coredynp.predictionW   = XML->sys.core[ithCore].prediction_width;
    coredynp.fp_issueW     = XML->sys.core[ithCore].fp_issue_width;
    coredynp.fp_decodeW    = XML->sys.core[ithCore].fp_issue_width;
    coredynp.num_alus      = XML->sys.core[ithCore].ALU_per_core;
    coredynp.num_fpus      = XML->sys.core[ithCore].FPU_per_core;
    coredynp.num_muls      = XML->sys.core[ithCore].MUL_per_core;


    coredynp.num_hthreads	     = XML->sys.core[ithCore].number_hardware_threads;
    coredynp.multithreaded       = coredynp.num_hthreads>1? true:false;
    coredynp.instruction_length  = XML->sys.core[ithCore].instruction_length;
    coredynp.pc_width            = XML->sys.virtual_address_width;

   	coredynp.opcode_length       = XML->sys.core[ithCore].opcode_width;
    coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width;
    coredynp.num_pipelines       = XML->sys.core[ithCore].pipelines_per_core[0];
    coredynp.pipeline_stages     = XML->sys.core[ithCore].pipeline_depth[0];
    coredynp.num_fp_pipelines    = XML->sys.core[ithCore].pipelines_per_core[1];
    coredynp.fp_pipeline_stages  = XML->sys.core[ithCore].pipeline_depth[1];
    coredynp.int_data_width      = int(ceil(XML->sys.machine_bits/32.0))*32;
    coredynp.fp_data_width       = coredynp.int_data_width;
    coredynp.v_address_width     = XML->sys.virtual_address_width;
    coredynp.p_address_width     = XML->sys.physical_address_width;

	coredynp.scheu_ty         = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme;
	coredynp.arch_ireg_width  =  int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size)));
	coredynp.arch_freg_width  =  int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size)));
	coredynp.num_IRF_entry    = XML->sys.core[ithCore].archi_Regs_IRF_size;
	coredynp.num_FRF_entry    = XML->sys.core[ithCore].archi_Regs_FRF_size;
	coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle;
	coredynp.total_cycles        = XML->sys.core[ithCore].total_cycles;
	coredynp.busy_cycles         = XML->sys.core[ithCore].busy_cycles;
	coredynp.idle_cycles         = XML->sys.core[ithCore].idle_cycles;

	//Max power duty cycle for peak power estimation
//	if (coredynp.core_ty==OOO)
//	{
//		coredynp.IFU_duty_cycle = 1;
//		coredynp.LSU_duty_cycle = 1;
//		coredynp.MemManU_I_duty_cycle =1;
//		coredynp.MemManU_D_duty_cycle =1;
//		coredynp.ALU_duty_cycle =1;
//		coredynp.MUL_duty_cycle =1;
//		coredynp.FPU_duty_cycle =1;
//		coredynp.ALU_cdb_duty_cycle =1;
//		coredynp.MUL_cdb_duty_cycle =1;
//		coredynp.FPU_cdb_duty_cycle =1;
//	}
//	else
//	{
		coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle;
		coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle;
		coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle;
		coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle;
		coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle;
		coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle;
		coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle;
		coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle;
		coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle;
		coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle;
		coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle;
//	}


	if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder)))
	{
		cout<<"Invalid Core Type"<<endl;
		exit(0);
	}
//	if (coredynp.core_ty==OOO)
//	{
//		cout<<"OOO processor models are being updated and will be available in next release"<<endl;
//		exit(0);
//	}
	if (!((coredynp.scheu_ty==PhysicalRegFile)||(coredynp.scheu_ty==ReservationStation)))
	{
		cout<<"Invalid OOO Scheduler Type"<<endl;
		exit(0);
	}

	if (!((coredynp.rm_ty ==RAMbased)||(coredynp.rm_ty ==CAMbased)))
	{
		cout<<"Invalid OOO Renaming Type"<<endl;
		exit(0);
	}

if (coredynp.core_ty==OOO)
{
	if (coredynp.scheu_ty==PhysicalRegFile)
	{
	  coredynp.phy_ireg_width  =  int(ceil(log2(XML->sys.core[ithCore].phy_Regs_IRF_size)));
	  coredynp.phy_freg_width  =  int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size)));
	  coredynp.num_ifreelist_entries = coredynp.num_IRF_entry  = XML->sys.core[ithCore].phy_Regs_IRF_size;
	  coredynp.num_ffreelist_entries = coredynp.num_FRF_entry  = XML->sys.core[ithCore].phy_Regs_FRF_size;
	}
	else if (coredynp.scheu_ty==ReservationStation)
	{//ROB serves as Phy RF in RS based OOO
      coredynp.phy_ireg_width  =  int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
	  coredynp.phy_freg_width  =  int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
	  coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size;
	  coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size;

	}

}
	coredynp.globalCheckpoint   =  32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference.
	coredynp.perThreadState     =  8;
	coredynp.instruction_length = 32;
	coredynp.clockRate          =  XML->sys.core[ithCore].clock_rate;
	coredynp.clockRate          *= 1e6;
	coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false;
	coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate;
	set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0);
}
